-
Notifications
You must be signed in to change notification settings - Fork 33
/
scrapetestlists.py
70 lines (58 loc) · 1.92 KB
/
scrapetestlists.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from bs4 import BeautifulSoup
import requests
import re
import datetime
CurrentTime = datetime.datetime.now()
CurrentYear = CurrentTime.year
def get_num(x):
return int(''.join(ele for ele in x if ele.isdigit()))
def gamescrape (Year):
url = str('http://stats.espncricinfo.com/ci/engine/records/team/match_results.html?class=1;id='+str(Year)+';type=year')
r = requests.get(url)
#print (url)
data = r.content
soup = BeautifulSoup(data, 'html.parser')
games = soup.find(text = 'Team 1').parent.parent.parent.parent.contents[5]
return games
g = open('testmatchlist.txt', 'w')
g.close()
for x in range (1877, CurrentYear+1):
y = 0
try:
games = gamescrape(x)
y = len(games)
except:
continue
z = int((y-1)/2)
for j in range (1, 2*z, 2):
try:
Test = games.contents[j]
Details = []
Details.append(x)
for i in range (1, 4):
try:
Details.append(Test.contents[i].contents[0].contents[0])
except:
continue
for i in range (6, 14):
try:
Details.append(Test.contents[i].contents[0].contents[0])
except:
continue
try:
Details.append(Test.contents[5].contents[0].contents[0])
Details.append(str(Test.contents[7])[33:-5])
except:
Details.append('Draw')
Details.append('Draw')
Details.append(str(Test.contents[11])[20:-5])
n = get_num(str(Test.contents[13].contents[0])[27:-14])
#/ci/engine/match/62397.html
Details.append(n)
print (Details)
f = open('testmatchlist.txt', 'a')
f.write(str(Details))
f.write('\n')
f.close()
except:
continue