Skip to content

Commit b26b3db

Browse files
committed
Update
1 parent 162ced4 commit b26b3db

File tree

5 files changed

+1189
-71
lines changed

5 files changed

+1189
-71
lines changed

basketball_reference_web_scraper/contracts/__init__.py

Whitespace-only changes.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import dataclasses
2+
import enum
3+
from typing import Optional, Dict
4+
5+
6+
@dataclasses.dataclass(frozen=True)
7+
class PlayerData:
8+
name: str
9+
identifier: str
10+
11+
12+
class SalaryOption(enum.Enum):
13+
PLAYER = "player"
14+
TEAM = "team"
15+
16+
17+
@dataclasses.dataclass(frozen=True)
18+
class SeasonSalaryData:
19+
value: str
20+
season_identifier: str
21+
option: Optional[SalaryOption]
22+
23+
24+
@dataclasses.dataclass(frozen=True)
25+
class RowData:
26+
player: PlayerData
27+
team_id: str
28+
salary_by_season: Dict[str, Optional[SeasonSalaryData]]
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import enum
2+
from typing import Dict, Any, Optional
3+
4+
from lxml.etree import Element
5+
6+
from .data import PlayerData
7+
8+
9+
class Column(enum.Enum):
10+
PLAYER = "player"
11+
TEAM = "team_id"
12+
FIRST_SEASON_SALARY = "y1"
13+
SECOND_SEASON_SALARY = "y2"
14+
THIRD_SEASON_SALARY = "y3"
15+
FOURTH_SEASON_SALARY = "y4"
16+
FIFTH_SEASON_SALARY = "y5"
17+
SIXTH_SEASON_SALARY = "y6"
18+
19+
20+
class SingleCellFinder:
21+
def __init__(self, column: Column):
22+
self.column = column
23+
24+
def find(self, row: Element) -> Optional[Element]:
25+
matching_cells = row.xpath(f'./td[@data-stat="{self.column.value}"]')
26+
if 1 == len(matching_cells):
27+
return matching_cells[0]
28+
29+
30+
class SingleCellValueReader:
31+
def __init__(self, cell_finder: SingleCellFinder, cell_reader):
32+
self.cell_finder = cell_finder
33+
self.cell_reader = cell_reader
34+
35+
def read(self, row: Element):
36+
cell = self.cell_finder.find(row=row)
37+
if cell:
38+
return self.cell_reader.read(cell=cell)
39+
40+
41+
class RowDataReader:
42+
def __init__(self, cell_readers_by_column: Dict[Column, SingleCellValueReader]):
43+
self.cell_readers_by_column = cell_readers_by_column
44+
45+
def read(self, row: Element) -> Dict[Column, Optional[Any]]:
46+
return dict(map(lambda e: [e[0], e[1].read(row=row)], self.cell_readers_by_column.items()))
47+
48+
49+
class PlayerDataCellReader:
50+
def __init__(self, player_identifier_attribute_name):
51+
self.player_identifier_attribute_name = player_identifier_attribute_name
52+
53+
def read(self, cell: Element) -> PlayerData:
54+
return PlayerData(name=cell.text_content(), identifier=cell.get(self.player_identifier_attribute_name))

basketball_reference_web_scraper/html.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import re
2+
from typing import Dict, Set, Tuple
23

34
from lxml import html
45
from lxml.html import HtmlComment
56

7+
from basketball_reference_web_scraper.contracts.readers import RowDataReader, Column, PlayerDataCellReader, \
8+
SingleCellValueReader, SingleCellFinder
9+
610

711
class BasicBoxScoreRow:
812
def __init__(self, html):
@@ -1231,10 +1235,37 @@ def losses(self):
12311235
return None
12321236

12331237

1238+
class PlayerContractsTableReader:
1239+
def __init__(self, row_reader):
1240+
self.row_reader = row_reader
1241+
1242+
@property
1243+
def column_names_by_identifier(self) -> Dict[str, str]:
1244+
return dict(
1245+
filter(lambda value: 1 == len(value[1]),
1246+
map(lambda expected_header_identifier: (
1247+
expected_header_identifier,
1248+
self.html.xpath(f'.//th[@scope="col"]/[@data-stat="{expected_header_identifier}]')),
1249+
Column)))
1250+
1251+
def rows(self, table):
1252+
for row_html in table.xpath('./tbody/tr[not[@class]]'):
1253+
yield PlayerContractsTableReader.row_reader.read(row=row_html)
1254+
1255+
12341256
class PlayerContractsRow:
12351257
def __init__(self, html):
12361258
self.html = html
12371259

1260+
def retrieve_cell(self, statistic_identifier: str, attributes: Set[str]) -> Tuple[str, Dict[str, str]]:
1261+
matching_cells = self.html.xpath(f'.//td[@data-stat="{statistic_identifier}')
1262+
1263+
if 1 == len(matching_cells):
1264+
cell = matching_cells[0]
1265+
return cell.text_content(), dict(map(lambda attribute: [attribute, cell.get(attribute)], attributes))
1266+
1267+
raise ValueError(f"expected exactly one matching cell for statistic identifier {statistic_identifier}")
1268+
12381269
@property
12391270
def player_name(self):
12401271
matching_cells = self.html.xpath('.//td[@data-stat="player"]')

0 commit comments

Comments
 (0)