-
Notifications
You must be signed in to change notification settings - Fork 0
/
web_scraper.py
65 lines (42 loc) · 1.47 KB
/
web_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import requests
from bs4 import BeautifulSoup
import pandas as pd
import items
import file_io as fio
import gspread_pandas
def get_distributer(url):
return url.split('.')[1]
# Config for google service account credentials
c = gspread_pandas.conf.get_config('google_api_config','google_secret.json')
# --- Structures for supported distributors --- #
unsupported_items = {
'Unsupported Distributors':[]
}
digikey_items = {
'Link':[],
'Part':[],
'Price':[],
'Description':[],
'Detailed Description':[]
}
# --- Load in links --- #
spreadsheet = gspread_pandas.Spread('GLV BOM',config=c)
urls = fio.read_urls(spreadsheet)
# --- Get distributors --- #
data = list() # Final list of all dataframes
for url in urls:
dstbtr = get_distributer(url)
if(dstbtr == 'digikey'):
digi_item = items.DigiKeyItem(url)
digikey_items['Link'].append(url)
digikey_items['Part'].append(digi_item.get_part_name())
digikey_items['Price'].append(digi_item.get_price())
digikey_items['Description'].append(digi_item.get_descriptions()[0])
digikey_items['Detailed Description'].append(digi_item.get_descriptions()[1])
else:
# Distributor not supported
unsupported_items['Link'].append(url)
print("Distributor not supported.")
# --- Save data --- #
data.append(pd.DataFrame(digikey_items))
fio.write_data(data, spreadsheet)