From 474b57de931158ab822f4ae6c46a9cc19e957703 Mon Sep 17 00:00:00 2001 From: jxu <7989982+jxu@users.noreply.github.com> Date: Mon, 26 Aug 2019 15:35:33 -0400 Subject: [PATCH] Cleanup python formatting for PEP 8 --- combine.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/combine.py b/combine.py index 2e50ef8..8e0d373 100644 --- a/combine.py +++ b/combine.py @@ -2,30 +2,31 @@ from os import sep from PyPDF2 import PdfFileMerger +RENDER_DIR = "render" +DOWNLOAD_EXTRA_FLAG = True +SITE_MAIN = "https://projecteuler.net/" -render_dir = "render" -download_extra_flag = True - def download_extra(url): - '''Tries to find a .txt attachment or .gif and download it to render_dir.''' - # TODO async request + """Finds if available a .txt attachment or animated .gif and downloads it + to RENDER_DIR + """ + # Not async for now to keep rate of requests low from bs4 import BeautifulSoup import requests from os.path import basename from PIL import Image from io import BytesIO - site_main = "http://projecteuler.net/" - print("Searching", url) content = requests.get(url).content soup = BeautifulSoup(content, "lxml") for a in soup.find_all('a', href=True): - if a["href"].endswith(".txt"): - print("Found", a["href"]) - r = requests.get(site_main + a["href"]) - with open(render_dir + sep + a.text, 'wb') as f: + href = a["href"] + if href.endswith(".txt"): + print("Found and writing", href) + r = requests.get(SITE_MAIN + href) + with open(RENDER_DIR + sep + a.text, 'wb') as f: f.write(r.content) for img in soup.find_all("img"): @@ -33,11 +34,11 @@ def download_extra(url): # Ignore spacer.gif (blank) if img_src.endswith(".gif") and "spacer" not in img_src: print("Found", img_src) - r = requests.get(site_main + img_src) + r = requests.get(SITE_MAIN + img_src) # Only write animated GIFs if Image.open(BytesIO(r.content)).is_animated: print("Writing", img_src) - with open(render_dir + sep + basename(img_src), 'wb') as f: + with open(RENDER_DIR + sep + basename(img_src), 'wb') as f: f.write(r.content) @@ -48,17 +49,16 @@ def main(): merger = PdfFileMerger() for problem_id in range(problem_id_start, problem_id_end+1): - pdf_path = render_dir + sep + str(problem_id) + ".pdf" + pdf_path = RENDER_DIR + sep + str(problem_id) + ".pdf" merger.append(pdf_path) - merger.write(render_dir + sep + "problems.pdf") + merger.write(RENDER_DIR + sep + "problems.pdf") print("Merged PDFs") - - if download_extra_flag: + if DOWNLOAD_EXTRA_FLAG: url_list = [] for problem_id in range(problem_id_start, problem_id_end+1): - url_list.append("https://projecteuler.net/problem=" + str(problem_id)) + url_list.append(SITE_MAIN + "problem=" + str(problem_id)) for url in url_list: download_extra(url) @@ -66,4 +66,3 @@ def main(): if __name__ == "__main__": main() -