This repository has been archived by the owner on Mar 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpopulate_artsy_db.py
executable file
·86 lines (78 loc) · 2.74 KB
/
populate_artsy_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
# TODO: Build in support for updating an already-existing DB:
# * Add new artworks
# * Handle missing category
# NOTE: 27577 records stored as of 03/07/2021
import json
import os.path
from datetime import datetime
from os import getenv
from time import sleep
from ratelimit import limits, sleep_and_retry
import requests
from unqlite import UnQLite
ARTSY_API = "https://api.artsy.net/api"
ARTSY_HEADERS = {
"X-Xapp-Token": getenv("ARTSY_TOKEN")
}
DB = UnQLite(f"{os.path.dirname(os.path.realpath(__file__))}/artsy.db")
# Artsy API rate limit says 5 requests per second
@sleep_and_retry
@limits(calls=25, period=5)
def artsy_request(url):
return requests.get(url, headers=ARTSY_HEADERS)
def create_populate():
artworks = DB.collection("artworks")
if not artworks.create():
print("Collection already exists, not proceeding with create/populate")
return
moar_pages = True
counter = 1
page_dict = {}
while moar_pages:
if counter == 1:
artworks_page = f"{ARTSY_API}/artworks"
else:
try:
artworks_page = page_dict["_links"]["next"]["href"]
except KeyError:
moar_pages = False
break
try:
print(artworks_page)
res = artsy_request(artworks_page)
except requests.exceptions.RequestException as err:
print(f"[{datetime.now()}] {err}")
break
page_dict = json.loads(res.text)
artworks.store(page_dict["_embedded"]["artworks"])
counter += 1
def add_artists():
artworks = DB.collection("artworks")
record = 0
for artwork in artworks:
if "_links" in artwork and "artists" not in artwork:
try:
while True:
print(artwork["_links"]["artists"]["href"])
res = artsy_request(artwork["_links"]["artists"]["href"])
if res.status_code != 200 or "Retry later" in res.text:
print("Previous API call failed, sleeping for 75 seconds")
sleep(75)
else:
break
except requests.exceptions.RequestException as err:
print(f"[{datetime.now()}] {err}")
break
artists_dict = json.loads(res.text)
if artists_dict["_embedded"]["artists"]:
artwork["artists"] = []
for artist in artists_dict["_embedded"]["artists"]:
artwork["artists"].append(artist["name"])
else:
artwork["artists"] = None
artworks.update(record, artwork)
record += 1
if __name__ == "__main__":
create_populate()
add_artists()