Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Jamee999 authored Mar 20, 2018
1 parent a5ba81f commit 14d3336
Show file tree
Hide file tree
Showing 21 changed files with 6,880 additions and 4,168 deletions.
4,008 changes: 2,009 additions & 1,999 deletions alldata.txt

Large diffs are not rendered by default.

563 changes: 282 additions & 281 deletions australiadata.txt

Large diffs are not rendered by default.

119 changes: 60 additions & 59 deletions bangladeshdata.txt
Original file line number Diff line number Diff line change
@@ -1,86 +1,87 @@
['Al Sahariar', 21.33, 387.22, 'OpenBat', 2000, 2003, 0.5139202407825433, 1.4, 0]
['Aminul Islam', 19.72, 195.19, 'Bat', 2000, 2002, 0.34868421052631576, 1.5253890253890252, 0]
['Habibul Bashar', 29.15, 188.8, 'Bat', 2000, 2008, 0.6027888446215139, 1.5088304825476289, 18]
['Hasibul Hossain', 7.78, 50.15, 'PartFast', 2000, 2001, 0.782258064516129, 1.5042149631190727, 0]
['Khaled Mashud', 17.97, 377.6, 'WK', 2000, 2007, 0.34066731141199225, 1.4, 12]
['Mehrab Hossain', 14.55, 134.59, 'OpenBat', 2000, 2003, 0.3265582655826558, 0.8361204013377926, 0]
['Akram Khan', 15.96, 387.22, 'Bat', 2000, 2003, 0.37755102040816324, 1.2, 0]
['Al Sahariar', 21.33, 387.22, 'OpenBat', 2000, 2003, 0.5139202407825433, 1.2, 0]
['Aminul Islam', 19.72, 195.19, 'Bat', 2000, 2002, 0.34868421052631576, 1.2, 0]
['Habibul Bashar', 29.15, 188.8, 'Bat', 2000, 2008, 0.6027888446215139, 1.2, 18]
['Hasibul Hossain', 7.78, 50.15, 'Fast', 2000, 2001, 0.782258064516129, 1.2, 0]
['Khaled Mashud', 17.97, 377.6, 'WK', 2000, 2007, 0.34066731141199225, 1.2, 12]
['Mehrab Hossain', 14.55, 50.69, 'OpenBat', 2000, 2003, 0.3265582655826558, 0.8361204013377926, 0]
['Mohammad Rafique', 17.54, 39.23, 'Spin', 2000, 2008, 0.6496932515337424, 0.9140161096460542, 0]
['Naimur Rahman', 12.63, 54.04, 'Spin', 2000, 2002, 0.4585152838427948, 1.1017451971274832, 7]
['Ranjan Das', 7.57, 67.23, 'PartFast', 2000, 2000, 0.06451612903225806, 1.1207970112079702, 0]
['Shahriar Hossain', 18.66, 387.22, 'OpenBat', 2000, 2004, 0.41596638655462187, 1.4, 0]
['Javed Omar', 20.82, 188.8, 'OpenBat', 2001, 2007, 0.38145930361499225, 3.9215686274509802, 0]
['Ranjan Das', 7.57, 67.23, 'Fast', 2000, 2000, 0.06451612903225806, 1.1207970112079702, 0]
['Shahriar Hossain', 18.66, 387.22, 'OpenBat', 2000, 2004, 0.41596638655462187, 1.2, 0]
['Javed Omar', 20.82, 188.8, 'OpenBat', 2001, 2007, 0.38145930361499225, 1.2, 0]
['Manjural Islam', 3.74, 59.84, 'Fast', 2001, 2004, 0.21657754010695188, 1.0844228235532585, 0]
['Mohammad Sharif', 9.02, 47.37, 'PartFast', 2001, 2007, 0.4326241134751773, 1.3135235923563853, 0]
['Mushfiqur Rahman', 13.88, 49.23, 'PartFast', 2001, 2004, 0.3072847682119205, 1.2098937851446212, 0]
['Mohammad Sharif', 9.02, 47.37, 'Fast', 2001, 2007, 0.4326241134751773, 1.2, 0]
['Mushfiqur Rahman', 13.88, 49.23, 'Fast', 2001, 2004, 0.3072847682119205, 1.2, 0]
['Enamul Haque', 12.59, 46.96, 'Spin', 2001, 2003, 0.34615384615384615, 0.9241567557028659, 0]
['Mohammad Ashraful', 22.07, 55.43, 'PartSpin', 2001, 2013, 0.4607744107744108, 1.3969719451542877, 13]
['Khaled Mahmud', 12.61, 57.85, 'PartMed', 2001, 2003, 0.5846153846153846, 1.030595813204509, 9]
['Mohammad Ashraful', 22.07, 55.43, 'PartSpin', 2001, 2013, 0.4607744107744108, 1.2, 13]
['Khaled Mahmud', 12.61, 57.85, 'Med', 2001, 2003, 0.5846153846153846, 1.030595813204509, 9]
['Mashrafe Mortaza', 11.97, 45.65, 'Fast', 2001, 2009, 0.6720067453625632, 1.0465830147019224, 1]
['Sanwar Hossain', 18.05, 54.91, 'PartSpin', 2001, 2003, 0.46433378196500674, 1.4010666184579228, 0]
['Sanwar Hossain', 18.05, 54.91, 'PartSpin', 2001, 2003, 0.46433378196500674, 1.2, 0]
['Fahim Muntasir', 11.62, 49.62, 'Spin', 2002, 2002, 0.6046511627906976, 1.1914715719063544, 0]
['Alamgir Kabir', 5.13, 48.77, 'PartFast', 2002, 2004, 0.09302325581395349, 1.687860388742506, 0]
['Ehsanul Haque', 21.06, 137.56, 'Bat', 2002, 2002, 0.22580645161290322, 2.0066889632107023, 0]
['Hannan Sarkar', 19.06, 385.36, 'OpenBat', 2002, 2004, 0.4665257223396758, 1.4, 0]
['Talha Jubair', 6.1, 44.72, 'Med', 2002, 2004, 0.5360824742268041, 1.4099789691852846, 0]
['Alok Kapali', 16.96, 58.33, 'PartSpin', 2002, 2006, 0.48626144879267275, 1.2603772243258138, 0]
['Tapash Baisya', 10.6, 53.41, 'Fast', 2002, 2005, 0.49934980494148246, 1.2433816256472916, 0]
['Tushar Imran', 20.47, 141.09, 'Bat', 2002, 2007, 0.33840304182509506, 1.5686274509803921, 0]
['Rafiqul Islam', 14.84, 387.22, 'Bat', 2002, 2002, 0.1320754716981132, 1.4, 0]
['Anwar Hossain', 11.33, 387.22, 'OpenBat', 2002, 2002, 0.3181818181818182, 1.4, 0]
['Mohammad Salim', 10.22, 385.36, 'WK', 2003, 2003, 0.3828125, 1.4, 0]
['Anwar Hossain Monir', 4.32, 130.56, 'PartFast', 2003, 2005, 0.3013698630136986, 1.7297723687176019, 0]
['Alamgir Kabir', 5.13, 46.78, 'PartFast', 2002, 2004, 0.09302325581395349, 1.2, 0]
['Ehsanul Haque', 21.06, 53.66, 'Bat', 2002, 2002, 0.22580645161290322, 1.2, 0]
['Hannan Sarkar', 19.06, 385.36, 'OpenBat', 2002, 2004, 0.4665257223396758, 1.2, 0]
['Talha Jubair', 6.1, 44.72, 'Med', 2002, 2004, 0.5360824742268041, 1.2, 0]
['Alok Kapali', 16.96, 58.33, 'PartSpin', 2002, 2006, 0.48626144879267275, 1.2, 0]
['Tapash Baisya', 10.6, 53.41, 'Fast', 2002, 2005, 0.49934980494148246, 1.2, 0]
['Tushar Imran', 20.47, 61.79, 'Bat', 2002, 2007, 0.33840304182509506, 1.2, 0]
['Rafiqul Islam', 14.84, 387.22, 'Bat', 2002, 2002, 0.1320754716981132, 1.2, 0]
['Anwar Hossain', 11.33, 387.22, 'OpenBat', 2002, 2002, 0.3181818181818182, 1.2, 0]
['Mohammad Salim', 10.22, 385.36, 'WK', 2003, 2003, 0.3828125, 1.2, 0]
['Anwar Hossain Monir', 4.32, 132.77, 'Fast', 2003, 2005, 0.3013698630136986, 1.2, 0]
['Rajin Saleh', 23.91, 184.45, 'Bat', 2003, 2008, 0.35857950974230046, 1.1691824448128436, 0]
['Enamul Haque jnr', 7.01, 40.94, 'Spin', 2003, 2013, 0.2177121771217712, 0.9574710334204004, 0]
['Manjural Islam Rana', 21.08, 45.05, 'PartSpin', 2004, 2004, 0.3713872832369942, 1.0497657006727925, 0]
['Faisal Hossain', 20.97, 377.6, 'Bat', 2004, 2004, 0.25925925925925924, 1.4, 0]
['Tareq Aziz', 7.99, 50.17, 'PartFast', 2004, 2004, 0.14965986394557823, 1.4215686274509802, 0]
['Nafees Iqbal', 20.14, 372.44, 'OpenBat', 2004, 2006, 0.3888888888888889, 1.4, 0]
['Aftab Ahmed', 17.93, 49.27, 'Bat', 2004, 2010, 0.4961636828644501, 1.3122923588039868, 0]
['Faisal Hossain', 20.97, 377.6, 'Bat', 2004, 2004, 0.25925925925925924, 1.2, 0]
['Tareq Aziz', 7.99, 50.17, 'Fast', 2004, 2004, 0.14965986394557823, 1.2, 0]
['Nafees Iqbal', 20.14, 372.44, 'OpenBat', 2004, 2006, 0.3888888888888889, 1.2, 0]
['Aftab Ahmed', 17.93, 49.27, 'Bat', 2004, 2010, 0.4961636828644501, 1.2, 0]
['Nazmul Hossain', 5.71, 46.68, 'Fast', 2004, 2011, 0.7619047619047619, 1.123172673324649, 0]
['Mushfiqur Rahim', 32.35, 370.14, 'WK', 2005, 2018, 0.46436781609195404, 1.4, 34]
['Shahadat Hossain', 9.25, 52.76, 'Fast', 2005, 2015, 0.45943562610229277, 1.3167615641616863, 0]
['Shahriar Nafees', 24.22, 366.97, 'OpenBat', 2005, 2013, 0.558641975308642, 1.4, 0]
['Syed Rasel', 7.17, 43.1, 'Fast', 2005, 2007, 0.3894736842105263, 1.2456250951066283, 0]
['Mushfiqur Rahim', 32.35, 370.14, 'WK', 2005, 2018, 0.46436781609195404, 1.2, 34]
['Shahadat Hossain', 9.25, 52.76, 'Fast', 2005, 2015, 0.45943562610229277, 1.2, 0]
['Shahriar Nafees', 24.22, 366.97, 'OpenBat', 2005, 2013, 0.558641975308642, 1.2, 0]
['Syed Rasel', 7.17, 43.1, 'Fast', 2005, 2007, 0.3894736842105263, 1.2, 0]
['Abdur Razzak', 10.65, 42.07, 'Spin', 2006, 2018, 0.6492146596858639, 1.0603035776531355, 0]
['Shakib Al Hasan', 37.37, 29.96, 'Spin', 2007, 2017, 0.6210471747019181, 0.9617349787510192, 9]
['Mehrab Hossain jnr', 19.44, 67.86, 'PartSpin', 2007, 2009, 0.5105042016806722, 1.315081315081315, 0]
['Mehrab Hossain jnr', 19.44, 67.86, 'PartSpin', 2007, 2009, 0.5105042016806722, 1.2, 0]
['Junaid Siddique', 24.17, 184.56, 'Bat', 2008, 2012, 0.41392567278940623, 1.160337552742616, 0]
['Sajedul Islam', 8.97, 44.8, 'PartMed', 2008, 2013, 0.3829787234042553, 1.3348676639815877, 0]
['Tamim Iqbal', 35.9, 185.59, 'OpenBat', 2008, 2018, 0.5551685706324881, 1.2779552715654954, 1]
['Sajedul Islam', 8.97, 44.8, 'Med', 2008, 2013, 0.3829787234042553, 1.2, 0]
['Tamim Iqbal', 35.9, 185.59, 'OpenBat', 2008, 2018, 0.5551685706324881, 1.2, 1]
['Naeem Islam', 25.97, 136.06, 'PartSpin', 2008, 2012, 0.3747747747747748, 1.0022934768226524, 0]
['Mahbubul Alam', 3.39, 43.97, 'PartFast', 2008, 2008, 0.06944444444444445, 1.0189015981179523, 0]
['Mahbubul Alam', 3.39, 43.97, 'Fast', 2008, 2008, 0.06944444444444445, 1.0189015981179523, 0]
['Imrul Kayes', 24.34, 185.59, 'OpenBat', 2008, 2018, 0.48737300435413644, 0.9584664536741215, 0]
['Raqibul Hasan', 19.64, 184.56, 'Bat', 2008, 2011, 0.4023952095808383, 0.7685352622061482, 0]
['Raqibul Hasan', 19.64, 184.56, 'Bat', 2008, 2011, 0.4023952095808383, 0.8, 0]
['Mahmudullah', 29.0, 42.6, 'PartSpin', 2009, 2018, 0.519496855345912, 1.1174479953373553, 2]
['Rubel Hossain', 7.64, 69.53, 'PartFast', 2009, 2017, 0.4514285714285714, 1.2600716312296696, 0]
['Rubel Hossain', 7.64, 69.53, 'Fast', 2009, 2017, 0.4514285714285714, 1.2, 0]
['Shafiul Islam', 8.72, 40.54, 'Fast', 2010, 2017, 0.4806378132118451, 1.0413787766563118, 0]
['Jahurul Islam', 21.48, 370.14, 'OpenBat', 2010, 2013, 0.38470066518847007, 1.4, 0]
['Jahurul Islam', 21.48, 370.14, 'OpenBat', 2010, 2013, 0.38470066518847007, 1.2, 0]
['Robiul Islam', 6.01, 45.65, 'Fast', 2010, 2014, 0.4876847290640394, 1.019108280254777, 0]
['Elias Sunny', 11.57, 41.83, 'Spin', 2011, 2013, 0.26573426573426573, 1.1469396491279864, 0]
['Nasir Hossain', 30.63, 42.25, 'PartSpin', 2011, 2017, 0.5446009389671361, 0.9169743994025146, 0]
['Suhrawadi Shuvo', 17.91, 39.94, 'Spin', 2011, 2011, 0.375, 0.9363476030142696, 0]
['Nazimuddin', 20.33, 370.14, 'OpenBat', 2011, 2012, 0.390625, 1.4, 0]
['Nazimuddin', 20.33, 370.14, 'OpenBat', 2011, 2012, 0.390625, 1.2, 0]
['Sohag Gazi', 18.66, 48.49, 'Spin', 2012, 2014, 0.6143667296786389, 0.9727628431767187, 0]
['Abul Hasan', 17.57, 74.71, 'PartFast', 2012, 2013, 0.5935251798561151, 1.342646207295889, 0]
['Anamul Haque', 20.26, 370.71, 'OpenBat', 2013, 2014, 0.33181818181818185, 1.4, 0]
['Mominul Haque', 44.37, 77.66, 'Bat', 2013, 2018, 0.5609375, 1.2102777555431041, 0]
['Ziaur Rahman', 14.61, 38.85, 'Fast', 2013, 2013, 0.35, 0.7561235356762513, 0]
['Marshall Ayub', 20.7, 132.33, 'Bat', 2013, 2014, 0.42955326460481097, 1.6932907348242812, 0]
['Al-Amin Hossain', 5.8, 47.14, 'PartFast', 2013, 2014, 0.6601941747572816, 1.0020331106593088, 0]
['Shamsur Rahman', 21.21, 134.86, 'OpenBat', 2014, 2014, 0.4765625, 1.5974440894568693, 0]
['Abul Hasan', 17.57, 74.71, 'Fast', 2012, 2013, 0.5935251798561151, 1.2, 0]
['Anamul Haque', 20.26, 370.71, 'OpenBat', 2013, 2014, 0.33181818181818185, 1.2, 0]
['Mominul Haque', 44.37, 77.66, 'Bat', 2013, 2018, 0.5609375, 1.2, 0]
['Ziaur Rahman', 14.61, 38.85, 'Fast', 2013, 2013, 0.35, 0.8, 0]
['Marshall Ayub', 20.7, 56.03, 'Bat', 2013, 2014, 0.42955326460481097, 1.2, 0]
['Al-Amin Hossain', 5.8, 47.14, 'Fast', 2013, 2014, 0.6601941747572816, 1.0020331106593088, 0]
['Shamsur Rahman', 21.21, 54.53, 'OpenBat', 2014, 2014, 0.4765625, 1.2, 0]
['Shuvagata Hom', 25.31, 39.76, 'PartSpin', 2014, 2016, 0.5583524027459954, 1.1502091289325331, 0]
['Taijul Islam', 7.77, 35.35, 'Spin', 2014, 2018, 0.40613718411552346, 1.0615971858393831, 0]
['Jubair Hossain', 7.33, 45.77, 'Spin', 2014, 2015, 0.14772727272727273, 1.321745347304453, 0]
['Mohammad Shahid', 8.94, 54.05, 'PartFast', 2015, 2015, 0.7037037037037037, 0.8791208791208791, 0]
['Soumya Sarkar', 22.75, 61.07, 'OpenPartFast', 2015, 2017, 0.578838174273859, 1.2021169354838708, 0]
['Liton Das', 33.06, 379.03, 'WK', 2015, 2018, 0.6043956043956044, 1.4, 0]
['Jubair Hossain', 7.33, 45.77, 'Spin', 2014, 2015, 0.14772727272727273, 1.2, 0]
['Mohammad Shahid', 8.94, 54.05, 'Fast', 2015, 2015, 0.7037037037037037, 0.8791208791208791, 0]
['Soumya Sarkar', 22.75, 61.07, 'OpenPartFast', 2015, 2017, 0.578838174273859, 1.2, 0]
['Liton Das', 33.06, 379.03, 'WK', 2015, 2018, 0.6043956043956044, 1.2, 0]
['Mustafizur Rahman', 3.06, 31.53, 'Fast', 2015, 2018, 0.35185185185185186, 1.022025332251825, 0]
['Kamrul Islam Rabbi', 3.55, 50.23, 'PartFast', 2016, 2017, 0.1896551724137931, 1.3805064169268122, 0]
['Kamrul Islam Rabbi', 3.55, 50.23, 'Fast', 2016, 2017, 0.1896551724137931, 1.2, 0]
['Mehidy Hasan Miraz', 16.94, 37.14, 'Spin', 2016, 2018, 0.4601063829787234, 1.0854278394733925, 0]
['Sabbir Rahman', 20.31, 68.44, 'Bat', 2016, 2018, 0.5497142857142857, 1.3129689174705252, 0]
['Sabbir Rahman', 20.31, 65.69, 'Bat', 2016, 2018, 0.5497142857142857, 1.2, 0]
['Subashis Roy', 3.13, 38.85, 'Fast', 2017, 2017, 0.23333333333333334, 1.197738463717969, 0]
['Taskin Ahmed', 3.86, 56.86, 'PartFast', 2017, 2017, 0.40963855421686746, 1.4147909967845658, 0]
['Nazmul Hossain Shanto', 28.18, 188.74, 'Bat', 2017, 2017, 0.25862068965517243, 6.270096463022508, 0]
['Nurul Hasan', 29.34, 377.48, 'WK', 2017, 2017, 0.47, 1.4, 0]
['Mosaddek Hossain', 47.65, 61.17, 'PartSpin', 2017, 2018, 0.41434262948207173, 1.4469453376205788, 0]
['Sunzamul Islam', 12.58, 40.04, 'PartSpin', 2018, 2018, 0.42857142857142855, 1.0932475884244373, 0]
['Taskin Ahmed', 3.86, 56.86, 'Fast', 2017, 2017, 0.40963855421686746, 1.2, 0]
['Nazmul Hossain Shanto', 28.18, 188.74, 'Bat', 2017, 2017, 0.25862068965517243, 1.2, 0]
['Nurul Hasan', 29.34, 377.48, 'WK', 2017, 2017, 0.47, 1.2, 0]
['Mosaddek Hossain', 37.46, 59.72, 'PartSpin', 2017, 2018, 0.41434262948207173, 1.2, 0]
['Sunzamul Islam', 12.58, 40.04, 'Spin', 2018, 2018, 0.42857142857142855, 1.0932475884244373, 0]
92 changes: 49 additions & 43 deletions cricinfoscrape.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# THIS FILE SCRAPES DATA FROM CRICINFO, TURNS IT INTO READABLE DATA, AND EXPORTS IT TO PLAYERS.TXT

a = 500 #Number of players to read, starting with Cap 1 for the country
b = 4 # Cricinfo Team ID: 1 = England, 2 = Australia, 3 = South Africa, 4 = West Indies, 5 = New Zealand, 6 = India, 7 = Pakistan, 8 = Sri Lanka, 9 = Zimbabwe, 25 = Bangladesh
c = 'westindiesplayers.txt' #file to write to
a = 700 #Number of players to read, starting with Cap 1 for the country (doesn't need to be exact, program will end with an error but still execute)
b = 25 # Cricinfo Team ID: 1 = England, 2 = Australia, 3 = South Africa, 4 = West Indies, 5 = New Zealand, 6 = India, 7 = Pakistan, 8 = Sri Lanka, 9 = Zimbabwe, 25 = Bangladesh
c = 'bangladeshplayers.txt' #file to write to

from bs4 import BeautifulSoup
import requests
Expand Down Expand Up @@ -248,56 +248,63 @@ def guruscrape (x):
soup = BeautifulSoup(player, 'html.parser')

try:
CaptainGames = soup.find(text='is captain')
print (CaptainGames.parent.parent.parent.contents[5])
if 'Span' in soup.prettify():
CaptainGames = CaptainGames.parent.parent.parent.contents[5]
else:
CaptainGames = CaptainGames.parent.parent.parent.contents[4]
CaptainGames = str(CaptainGames)
CaptainGames = CaptainGames[4:-5]
print (CaptainGames)
ReducedStats.append(int(CaptainGames))
CaptainGames = ''
while CaptainGames == '':
CaptainGames = soup.find(text='is captain')
if 'Span' in soup.prettify():
CaptainGames = CaptainGames.parent.parent.parent.contents[5]
else:
CaptainGames = CaptainGames.parent.parent.parent.contents[4]
CaptainGames = str(CaptainGames)
CaptainGames = CaptainGames[4:-5]
ReducedStats.append(int(CaptainGames))
except:
ReducedStats.append(0)

try:
WKGames = soup.find(text='is designated keeper')
if 'Span' in soup.prettify():
WKGames = WKGames.parent.parent.next_sibling.next_sibling.next_sibling.next_sibling
else:
WKGames = WKGames.parent.parent.parent.next_sibling.next_sibling
WKGames = str(WKGames)
WKGames = WKGames[4:-5]
ReducedStats.append(int(WKGames))
WKGames = ''
while WKGames == '':
WKGames = soup.find(text='is designated keeper')
if 'Span' in soup.prettify():
WKGames = WKGames.parent.parent.parent.contents[5]
else:
WKGames = WKGames.parent.parent.parent.contents[3]
WKGames = str(WKGames)
WKGames = WKGames[4:-5]
ReducedStats.append(int(WKGames))
except:
ReducedStats.append(0)
try:
Bat1Inns = soup.find(text='1st position')
if 'Span' in soup.prettify():
Bat1Inns = Bat1Inns.parent.parent.parent.contents[7]
else:
Bat1Inns = Bat1Inns.parent.parent.parent.contents[5]
#print (Bat1Inns.parent.parent.parent.contents)
Bat1Inns = str(Bat1Inns)
Bat1Inns = Bat1Inns[4:-5]
Bat1Inns = int(Bat1Inns)
Bat1Inns = ''
while Bat1Inns == '':
Bat1Inns = soup.find(text='1st position')
if 'Span' in soup.prettify():
Bat1Inns = Bat1Inns.parent.parent.parent.contents[7]
else:
Bat1Inns = Bat1Inns.parent.parent.parent.contents[5]
#print (Bat1Inns.parent.parent.parent.contents)
Bat1Inns = str(Bat1Inns)
Bat1Inns = Bat1Inns[4:-5]
Bat1Inns = int(Bat1Inns)
except:
Bat1Inns = 0
try:
Bat2Inns = soup.find(text='2nd position')
if 'Span' in soup.prettify():
Bat2Inns = Bat2Inns.parent.parent.parent.contents[7]
else:
Bat2Inns = Bat2Inns.parent.parent.parent.contents[5]
#print (Bat2Inns)
Bat2Inns = str(Bat2Inns)
Bat2Inns = Bat2Inns[4:-5]
Bat2Inns = int(Bat2Inns)
Bat2Inns = ''
while Bat2Inns == '':

Bat2Inns = soup.find(text='2nd position')
if 'Span' in soup.prettify():
Bat2Inns = Bat2Inns.parent.parent.parent.contents[7]
else:
Bat2Inns = Bat2Inns.parent.parent.parent.contents[5]
#print (Bat2Inns)
Bat2Inns = str(Bat2Inns)
Bat2Inns = Bat2Inns[4:-5]
Bat2Inns = int(Bat2Inns)
except:
Bat2Inns = 0
OpenInns = Bat1Inns + Bat2Inns
#print (Bat1Inns, Bat2Inns)
print (CaptainGames, WKGames, OpenInns)
ReducedStats.append(OpenInns)


Expand All @@ -312,7 +319,7 @@ def PlayerList (x, y):
f.write(str(Format))
f.write('\n')
f.close()

f = open(c,'w')
for i in range (1, (x+1)):
ReducedStats = []
HTMLString = newsoup.find(text=i)
Expand All @@ -327,10 +334,9 @@ def PlayerList (x, y):
scrape (PlayerNo)
guruscrape (PlayerNo)
print (ReducedStats)
f = open(c,'a')
f.write(str(ReducedStats))
f.write('\n')
f.close()
f.close()


#print ('[Name, BowlStyle, Games, Inns, NO, RunsSc, Balls, RunsAg, Wic, FirstTest, LastTest, FCGames, FCRuns, FCBatAve, FCInns, FCWickets, FCBowlAve]')
Expand Down
55 changes: 55 additions & 0 deletions cricinfoyearscrape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#scrapes cricinfo statguru to find the average runs/wicket for the 5 years either side of the year input
#runs through the data for all years in test history
#outputs to eradata.txt

from bs4 import BeautifulSoup
import requests
import re

#f = open('eradata.txt','w')
#f.close()

def scrape (Year):

global ReducedStats

url = str('http://stats.espncricinfo.com/ci/engine/stats/index.html?class=1;filter=advanced;groupby=overall;orderby=runs;spanmax1=31+Dec+' + str(Year+5) + ';spanmin1=01+Jan+' +str(Year-5) +';spanval1=span;template=results;type=bowling')
r = requests.get(url)
print (url)
data = r.content

soup = BeautifulSoup(data, 'html.parser')

Average1 = str(soup.find(text = 'Overall figures').parent.parent.contents[5].contents[1].contents[25])
Average1 = Average1[4:-5]
Average2 = str(soup.find(text = 'Overall figures').parent.parent.contents[5].contents[1].contents[23])
Average2 = Average2[4:-5]
Average3 = str(soup.find(text = 'Overall figures').parent.parent.contents[5].contents[1].contents[27])
Average3 = Average3[4:-5]
Average4 = str(soup.find(text = 'Overall figures').parent.parent.contents[5].contents[1].contents[29])
Average4 = Average4[4:-5]

Average = Average1

if 2 < float(Average1) < 5:
Average = Average1

if 2 < float(Average2) < 5:
Average = Average2

if 2 < float(Average3) < 5:
Average = Average3

if 2 < float(Average4) < 5:
Average = Average4

f = open('ERdata.txt','a')
f.write (str(str(Year) + ', '+ str(Average)))
f.write ('\n')
f.close()
print (Year, Average)

#scrape (2018)

for i in range (1918 , 2018):
scrape (i)
Loading

0 comments on commit 14d3336

Please sign in to comment.