-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathebay-dl.py
137 lines (115 loc) · 3.79 KB
/
ebay-dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import argparse
from ctypes.wintypes import tagSIZE
from fileinput import filename
import requests
from bs4 import BeautifulSoup
import json
import csv
def parse_price(tag):
tag = str(tag)
if "See price" in tag:
return str('See price')
elif 'DEFAULT' in tag:
tag = tag.split('DEFAULT',1)[0]
numbers = ''
for char in tag:
if char in '1234567890':
numbers += char
else:
numbers = ''
for char in tag:
if char in '1234567890':
numbers += char
return int(numbers)
def parse_shipping(text):
numbers = ''
for char in text:
if 'free shipping' in text.lower():
return 0
elif char in '1234567890':
numbers += char
return numbers
def parse_itemssold(text):
numbers = ''
for char in text:
if char in '1234567890':
numbers += char
if 'sold' in text:
return int(numbers)
else:
return 0
#if __name__ == '__main__':
# get command line arguments
parser = argparse.ArgumentParser(description='download ebay information and convert to JSON')
parser.add_argument('search_term')
parser.add_argument('--num_pages', default=10)
parser.add_argument('--csv', default=False)
args = parser.parse_args()
print('args.search_terms=', args.search_term)
# list of all ebay items
items = []
# loop over ebay webpages
for page_number in range(1,int(args.num_pages)+1):
# build the url
url = 'https://www.ebay.com/sch/i.html?_from=R40&_nkw=' + args.search_term + '&_sacat=0&_pgn=' + str(page_number)
#download the html
r = requests.get(url)
status = r.status_code
html = r.text
# process the html
soup = BeautifulSoup(html, 'html.parser')
# loop over items in page
tags_items = soup.select('.s-item')
for tag_item in tags_items:
name = None
tags_name = tag_item.select('.s-item__title')
for tag in tags_name:
name = tag.text
price = None
tags_price = tag_item.select('.s-item__price')
for tag in tags_price:
price = parse_price(tag)
status = None
tags_status = tag_item.select('.SECONDARY_INFO')
for tag in tags_status:
status = tag.text
shipping = None
tags_shipping = tag_item.select('.s-item__shipping, .s-item__freeXDays')
for tag in tags_shipping:
shipping = parse_shipping(tag.text)
free_returns = False
tags_freereturn = tag_item.select('.s-item__free-returns')
for tag in tags_freereturn:
free_returns = True
items_sold = None
tags_itemssold = tag_item.select('.s-item__hotness, .s-item__additionalItemhotness')
for tag in tags_itemssold:
items_sold = parse_itemssold(tag.text)
item ={
'name': name,
'price': price,
'status': status,
'shipping': shipping,
'free_returns': free_returns,
'items_sold': items_sold
}
if 'Shop on eBay' in item['name']:
continue
else:
items.append(item)
if bool(args.csv) == True:
# write to csv file
csv_columns= ['name', 'price', 'status', 'shipping', 'free_returns', 'items_sold']
filenamecsv = args.search_term+'.csv'
filenamecsv = filenamecsv.replace(" ", "_")
with open(filenamecsv, 'w', newline='', encoding='utf-8') as f:
ebaycsv = csv.DictWriter(f, fieldnames=csv_columns)
ebaycsv.writeheader()
for item in items:
ebaycsv.writerow(item)
else:
# write to json file
filename = args.search_term+'.json'
filename = filename.replace(" ", "_")
with open(filename, 'w', encoding='utf-8') as fj:
fj.write(json.dumps(items))