Skip to content

Commit

Permalink
Cleanup python formatting for PEP 8
Browse files Browse the repository at this point in the history
  • Loading branch information
jxu committed Aug 26, 2019
1 parent 092050a commit 474b57d
Showing 1 changed file with 18 additions and 19 deletions.
37 changes: 18 additions & 19 deletions combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,43 @@
from os import sep
from PyPDF2 import PdfFileMerger

RENDER_DIR = "render"
DOWNLOAD_EXTRA_FLAG = True
SITE_MAIN = "https://projecteuler.net/"


render_dir = "render"
download_extra_flag = True

def download_extra(url):
'''Tries to find a .txt attachment or .gif and download it to render_dir.'''
# TODO async request
"""Finds if available a .txt attachment or animated .gif and downloads it
to RENDER_DIR
"""
# Not async for now to keep rate of requests low
from bs4 import BeautifulSoup
import requests
from os.path import basename
from PIL import Image
from io import BytesIO

site_main = "http://projecteuler.net/"

print("Searching", url)
content = requests.get(url).content
soup = BeautifulSoup(content, "lxml")
for a in soup.find_all('a', href=True):
if a["href"].endswith(".txt"):
print("Found", a["href"])
r = requests.get(site_main + a["href"])
with open(render_dir + sep + a.text, 'wb') as f:
href = a["href"]
if href.endswith(".txt"):
print("Found and writing", href)
r = requests.get(SITE_MAIN + href)
with open(RENDER_DIR + sep + a.text, 'wb') as f:
f.write(r.content)

for img in soup.find_all("img"):
img_src = img["src"]
# Ignore spacer.gif (blank)
if img_src.endswith(".gif") and "spacer" not in img_src:
print("Found", img_src)
r = requests.get(site_main + img_src)
r = requests.get(SITE_MAIN + img_src)
# Only write animated GIFs
if Image.open(BytesIO(r.content)).is_animated:
print("Writing", img_src)
with open(render_dir + sep + basename(img_src), 'wb') as f:
with open(RENDER_DIR + sep + basename(img_src), 'wb') as f:
f.write(r.content)


Expand All @@ -48,22 +49,20 @@ def main():
merger = PdfFileMerger()

for problem_id in range(problem_id_start, problem_id_end+1):
pdf_path = render_dir + sep + str(problem_id) + ".pdf"
pdf_path = RENDER_DIR + sep + str(problem_id) + ".pdf"
merger.append(pdf_path)

merger.write(render_dir + sep + "problems.pdf")
merger.write(RENDER_DIR + sep + "problems.pdf")
print("Merged PDFs")


if download_extra_flag:
if DOWNLOAD_EXTRA_FLAG:
url_list = []
for problem_id in range(problem_id_start, problem_id_end+1):
url_list.append("https://projecteuler.net/problem=" + str(problem_id))
url_list.append(SITE_MAIN + "problem=" + str(problem_id))

for url in url_list:
download_extra(url)


if __name__ == "__main__":
main()

0 comments on commit 474b57d

Please sign in to comment.