-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
91 lines (72 loc) · 2.98 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os, shutil
from jinja2 import Template, Environment, FileSystemLoader
from bs4 import BeautifulSoup
import requests
import json
class SiteGenerator(object):
def __init__(self):
self.frames = []
self.env = Environment(loader=FileSystemLoader('template'))
self.fetch_iframes()
self.empty_public()
self.copy_static()
self.render_page()
def fetch_iframes(self):
""" Request iframes, saving them in self.iframes """
link = 'https://bandcamp.com/USERNAME/wishlist' # Add your Bandcamp credentials here
data = requests.get(link)
#print(data.status_code)
#print(data.headers)
# Load data into BS4
src = data.content
soup = BeautifulSoup(src, 'html.parser')
result = []
for tag in soup.findAll(True,{'data-blob':True}) :
result.append(tag['data-blob'])
#print(result)
#Extract wishlist tag
for item in result:
item = json.loads(item)
#print(item['item_cache']['wishlist'])
# Extract iframe values from wishlist tag
values = item['item_cache']['wishlist']
album_id = []
track_id = []
track_name = []
for v_id, v_info in values.items():
album_id.append(v_info['album_id'])
for v_id, v_info in values.items():
track_id.append(v_info['tralbum_id'])
for v_id, v_info in values.items():
track_name.append(v_info['item_title'])
#print(album_id)
#print(track_id)
#print(track_name)
#Create iframe templates from album_id and track_id
stub_list = ['''<iframe style="border: 0; width: 100%; height: 120px;" src="https://bandcamp.com/EmbeddedPlayer/album=''', '''/size=large/bgcol=333333/linkcol=0687f5/tracklist=false/artwork=small/track=''','''/transparent=true/" seamless></iframe>''' ]
self.iframes = [stub_list[0] + str(album) + stub_list[1] + str(track)+ stub_list[2] for album, track in zip(album_id, track_id)]
iframes_tuples = {stub_list[0] + str(album) + stub_list[1] + str(track)+ stub_list[2] for album, track in zip(album_id, track_id)}
#len(self.iframes)
def empty_public(self):
try:
shutil.rmtree('./public')
os.mkdir('./public')
except:
print("Could not clean up old files")
def copy_static(self):
""" Copy static files to public directory """
try:
shutil.copytree('template/static', 'public/static')
except:
print("Error copying static files.")
def render_page(self):
print("Rendering page to static file.")
template = self.env.get_template('_layout.html')
with open('public/index.html', 'w+') as file:
html = template.render(
title = "The Goings On",
iframes = self.iframes
)
file.write(html)
if __name__ == "__main__":
SiteGenerator()