-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawling.py
94 lines (81 loc) · 2.45 KB
/
crawling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#-*-coding:utf-8-*-
"""
Dataset Source = KBReport.com
Just for private study
"""
import locale
import requests
import urllib
from bs4 import BeautifulSoup
outfielder = ("우익수", "좌익수", "중견수", "외야수")
catcher = "포수"
shortstop = "유격수"
left = "좌타"
right = "우타"
both = "양타"
def get_datalist(url):
URL = "http://www.kbreport.com/player/detail/1501"
datalist = parse_html(get_html(URL))
return datalist
def get_html(url):
_html = ""
resp = requests.get(url)
if resp.status_code == 200:
_html = resp.text
return _html
def hand_change(hand):
ret = ""
if left in hand:
ret = "왼손"
elif right in hand:
ret = "오른손"
elif both in hand:
ret = "양손"
return ret
def position_change(position):
ret = ""
if catcher in position:
ret = "포수"
elif shortstop in position:
ret = "유격수"
else:
flag = 0
for index in outfielder:
if index in position:
ret = "외야수"
flag = 1
if flag == 0:
ret = "내야수"
return ret
def money_change(payment, money_type):
ret = ""
if payment is not '':
if "₩" in payment:
payment = payment.split("₩")[1]
payment = ''.join(a for a in payment if a not in ',')
ret = payment[:-4]
else:
payment = payment.split('$')[1]
payment = ''.join(a for a in payment if a not in ',')
ret = payment[:-1]
return ret
def parse_html(html):
player_info_ret = list()
soup = BeautifulSoup(html, 'html.parser')
player_info_area = soup.find("div", {"class" : "player-info-box"})
age = player_info_area.find("span", {"class" :
"player-info-1"}).text.strip()
hand = player_info_area.find("span", {"class" :
"player-info-2"}).text.strip()
hand = hand_change(hand)
position = player_info_area.find("span", {"class" :
"player-info-4"}).text.strip()
position = position_change(position)
contract = player_info_area.find("span", {"class" :
"player-info-7"}).text.strip()
contract = money_change(contract, "계약금")
payment = player_info_area.find("span", {"class" :
"player-info-8"}).text.strip()
payment = money_change(payment, "연봉")
player_info_ret.append((age, hand, position, contract, payment))
return player_info_ret