Skip to content

Commit

Permalink
update chi_police_district_councils spider and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
rhetr committed Jun 30, 2023
1 parent 469a6c9 commit 3d3f78b
Show file tree
Hide file tree
Showing 26 changed files with 97 additions and 34,710 deletions.
12 changes: 6 additions & 6 deletions city_scrapers/spiders/chi_police_district_councils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def decode_value(value):


def generate_start_urls(url):
return [f"{url}/DC001.html"]
return [
f"{url}/DC{district:03}.html"
for district in range(1, 26)
Expand All @@ -43,7 +44,9 @@ def parse(self, response):
if "notices-and-agendas" in url:
url_text = item.css("p > a::text").get().strip()
yield response.follow(
url, callback=self._parse_meeting, meta={"url_text": url_text}
url,
callback=self._parse_meeting,
meta={"url_text": url_text, "source": response.url},
)

def _parse_meeting(self, response):
Expand All @@ -64,7 +67,7 @@ def _parse_meeting(self, response):
time_notes="",
location={},
links=self._parse_links(response),
source=self._parse_source(response),
source=response.meta["source"],
)
if not pdf == {}:
meeting["description"] = self._parse_description(pdf)
Expand Down Expand Up @@ -111,7 +114,7 @@ def _parse_title(self, url, item=None):
district = int(urllib.parse.urlparse(url).path.split("/")[7].split("-")[-1])

meeting_type = self._parse_meeting_type(item)
subtitle = f"{district} {meeting_type}" if meeting_type else district
subtitle = f"{district:03} {meeting_type}" if meeting_type else f"{district:03}"

return f"Chicago Police District Council {subtitle} Meeting"

Expand Down Expand Up @@ -188,6 +191,3 @@ def _parse_location(self, item):

def _parse_links(self, response):
return [{"href": response.url, "title": "agenda"}]

def _parse_source(self, response):
return response.url
Loading

0 comments on commit 3d3f78b

Please sign in to comment.