PEP8

k4cg · Sep 30, 2019 · eb3926a · jkowalleck · Oct 2, 2019 · eb3926a
1 parent 07c2f51
commit eb3926a
Showing 1 changed file with 44 additions and 28 deletions.
diff --git a/nichtparasoup.py b/nichtparasoup.py
@@ -1,6 +1,13 @@
 #!/usr/bin/env python
 
 ### import libraries
+from crawler.giphy import Giphy
+from crawler.fourchan import Fourchan
+from crawler.instagram import Instagram
+from crawler.ninegag import NineGag
+from crawler.pr0gramm import Pr0gramm
+from crawler.soupio import SoupIO
+from crawler.reddit import Reddit
 from os import path
 import math
 import random
@@ -27,7 +34,6 @@
 from werkzeug.serving import run_simple
 
 
-
 ## import templates
 import templates as tmpl
 
@@ -45,7 +51,7 @@
                         dest="config_file")
 args = arg_parser.parse_args()
 
-### configuration
+# configuration
 # init config parser
 config = RawConfigParser()
 
@@ -72,7 +78,8 @@
     hdlr = logging.handlers.SysLogHandler()
 else:
     hdlr = logging.FileHandler(config.get("Logging", "File"))
-    hdlr.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+    hdlr.setFormatter(logging.Formatter(
+        '%(asctime)s %(levelname)s %(message)s'))
 
 logger.addHandler(hdlr)
 logger.setLevel(logverbosity.upper())
@@ -96,14 +103,7 @@
 Crawler.request_headers({'User-Agent': user_agent})
 Crawler.set_logger(logger)
 
-### config the  crawlers
-from crawler.reddit import Reddit
-from crawler.soupio import SoupIO
-from crawler.pr0gramm import Pr0gramm
-from crawler.ninegag import NineGag
-from crawler.instagram import Instagram
-from crawler.fourchan import Fourchan
-from crawler.giphy import Giphy
+# config the  crawlers
 
 
 def get_crawlers(configuration, section):
@@ -151,7 +151,8 @@ def get_crawlers(configuration, section):
                 crawler_sites.append(url_quote_plus(factorPair))
                 continue
 
-            factorPair_parts = [factorPairPart.strip() for factorPairPart in factorPair.split(factor_separator)]
+            factorPair_parts = [factorPairPart.strip(
+            ) for factorPairPart in factorPair.split(factor_separator)]
 
             if not factorPair_parts or not len(factorPair_parts) == 2:
                 continue
@@ -168,26 +169,32 @@ def get_crawlers(configuration, section):
             crawler_class_name, repr(crawler_sites), repr(factors[crawler_class_name])))
 
         if crawler_class == Reddit:
-            crawler_uris = {site: "https://www.reddit.com/r/%s" % site for site in crawler_sites}
+            crawler_uris = {site: "https://www.reddit.com/r/%s" %
+                            site for site in crawler_sites}
         elif crawler_class == NineGag:
-            crawler_uris = {site: "https://9gag.com/%s" % site for site in crawler_sites}
+            crawler_uris = {site: "https://9gag.com/%s" %
+                            site for site in crawler_sites}
         elif crawler_class == Pr0gramm:
-            crawler_uris = {crawler_sites[0]: "https://pr0gramm.com/api/items/get"}
+            crawler_uris = {crawler_sites[0]                            : "https://pr0gramm.com/api/items/get"}
         elif crawler_class == SoupIO:
             crawler_uris = {site: ("http://www.soup.io/%s" if site in ["everyone"]  # public site
                                    else "http://%s.soup.io") % site  # user site
                             for site in crawler_sites}
         elif crawler_class == Instagram:
-            crawler_uris = {site: "https://instagram.com/%s" % site for site in crawler_sites}
+            crawler_uris = {site: "https://instagram.com/%s" %
+                            site for site in crawler_sites}
         elif crawler_class == Fourchan:
-            crawler_uris = {site: "https://boards.4chan.org/%s/" % site for site in crawler_sites}
+            crawler_uris = {site: "https://boards.4chan.org/%s/" %
+                            site for site in crawler_sites}
         elif crawler_class == Giphy:
-            crawler_uris = {site: "https://api.giphy.com/v1/gifs/search?q=%s" % site for site in crawler_sites}
+            crawler_uris = {
+                site: "https://api.giphy.com/v1/gifs/search?q=%s" % site for site in crawler_sites}
 
         if crawler_class_name not in crawlers:
             crawlers[crawler_class_name] = {}
 
-        crawlers[crawler_class_name] = {site: crawler_class(crawler_uris[site], site) for site in crawler_uris}
+        crawlers[crawler_class_name] = {site: crawler_class(
+            crawler_uris[site], site) for site in crawler_uris}
 
     return crawlers, factors
 
@@ -214,7 +221,8 @@ def cache_fill_loop():
                         sources[crawler][site].crawl()
                         info = Crawler.info()
                     except Exception as e:
-                        logger.error("Error in crawler %s - %s: %s" % (crawler, site, e))
+                        logger.error("Error in crawler %s - %s: %s" %
+                                     (crawler, site, e))
                         break
 
         # sleep for non-invasive threading ;)
@@ -226,15 +234,19 @@ def cache_get():
     return Crawler.get_image()
 
 # get status of cache
+
+
 def cache_status_dict():
     info = Crawler.info()
     return {
-        "crawler" : Crawler.info() ,
-        "factors" : factors ,
-        "min_cache_imgs_before_refill" : min_cache_imgs_before_refill ,
+        "crawler": Crawler.info(),
+        "factors": factors,
+        "min_cache_imgs_before_refill": min_cache_imgs_before_refill,
     }
 
 # print status of cache
+
+
 def cache_status_text():
     status = cache_status_dict()
     info = status['crawler']
@@ -243,7 +255,8 @@ def cache_status_text():
     bar_repr_refill = status['min_cache_imgs_before_refill'] / bar_reps
 
     msg = "images cached: %d (%d bytes) - already crawled: %d (%d bytes)" % \
-          (info["images"], info["images_size"], info["blacklist"], info["blacklist_size"])
+          (info["images"], info["images_size"],
+           info["blacklist"], info["blacklist_size"])
     logger.info(msg)
 
     for crawler in sources:
@@ -264,12 +277,15 @@ def cache_status_text():
                     else:
                         bar += "*"
 
-                sitestats = ("%15s - %-15s with factor %4.1f: %2d Images " + bar) % (crawler, site, factor, count)
+                sitestats = ("%15s - %-15s with factor %4.1f: %2d Images " +
+                             bar) % (crawler, site, factor, count)
                 logger.info(sitestats)
                 msg += "\r\n" + sitestats
     return msg
 
 # print imagelist
+
+
 def show_imagelist():
     return "\n".join(Crawler.show_imagelist())
 
@@ -301,7 +317,7 @@ def reset():
     return "%i000" % (call_reset_timeout - time_since_last_call)
 
 
-### werkzeug webserver
+# werkzeug webserver
 # class with mapping to cache_* functions above
 class NichtParasoup(object):
     # init webserver with routing
@@ -366,7 +382,7 @@ def on_reset(self, request):
         return Response(reset())
 
 
-### runtime
+# runtime
 # main function how to run
 # on start-up, fill the cache and get up the webserver
 if __name__ == "__main__":
@@ -377,7 +393,7 @@ def on_reset(self, request):
         cache_fill_thread.start()
     except (KeyboardInterrupt, SystemExit):
         # end the cache filler thread properly
-        min_cache_imgs_before_refill -1
+        min_cache_imgs_before_refill - 1
 
     # give the cache_fill some time in advance
     time.sleep(1.337)