Skip to content

Commit

Permalink
Check
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhil25803 committed May 16, 2024
1 parent dbf3e6b commit 8f3a428
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 25 deletions.
2 changes: 1 addition & 1 deletion src/scrape_up/espncricinfo/espncricinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,4 @@ def get_livescores(self):
live_scores.append(match_details)
return live_scores
except:
return live_scores
return live_scores
54 changes: 31 additions & 23 deletions src/scrape_up/indeed/Indeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ class Indeed:
"""

headers: dict = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}

def __init__(
self, search_query: str, location_query: str = "", min_jobs: int = 100
self, search_query: str, location_query: str = "", min_jobs: int = 100
):
self.search_query = search_query
self.location_query = location_query
Expand Down Expand Up @@ -55,12 +55,12 @@ def get_jobs(self):
]
```
"""
search = self.search_query.strip().replace(' ', '+').lower()
location = self.location_query.strip().replace(' ', ', ').lower()
search = self.search_query.strip().replace(" ", "+").lower()
location = self.location_query.strip().replace(" ", ", ").lower()

base_url: str = f'https://in.indeed.com/m/jobs?q={search}'
if location != '':
base_url = f'{base_url}&l={location}'
base_url: str = f"https://in.indeed.com/m/jobs?q={search}"
if location != "":
base_url = f"{base_url}&l={location}"

job_listings = []
start_index = 0
Expand All @@ -69,8 +69,8 @@ def get_jobs(self):
response = requests.get(url, headers=Indeed.headers)

if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
job_cards = soup.find_all('div', class_='job_seen_beacon')
soup = BeautifulSoup(response.content, "html.parser")
job_cards = soup.find_all("div", class_="job_seen_beacon")
if not job_cards:
break
for job_card in job_cards:
Expand All @@ -79,7 +79,7 @@ def get_jobs(self):
break
start_index += len(job_cards)
else:
print(f'Failed to fetch URL: {url}')
print(f"Failed to fetch URL: {url}")
break

return job_listings
Expand All @@ -105,34 +105,42 @@ def scrape_job_info(self, job_card):
}
```
"""
job_title_element = job_card.find('h2', class_='jobTitle')
job_title_element = job_card.find("h2", class_="jobTitle")
job_title: str = job_title_element.text.strip() if job_title_element else None

job_post_state_element = job_card.find('span', class_='css-92r8pb')
company_name: str = job_post_state_element.text.strip() if job_post_state_element else None
job_post_state_element = job_card.find("span", class_="css-92r8pb")
company_name: str = (
job_post_state_element.text.strip() if job_post_state_element else None
)

location_element = job_card.find('div', class_='css-1p0sjhy')
location_element = job_card.find("div", class_="css-1p0sjhy")
location: str = location_element.text.strip() if location_element else None

link_element = job_card.find('a', class_='jcs-JobTitle')
link: str = 'https://in.indeed.com' + link_element['href'] if link_element else None
link_element = job_card.find("a", class_="jcs-JobTitle")
link: str = (
"https://in.indeed.com" + link_element["href"] if link_element else None
)

meta_data_element = job_card.find('div', class_='jobMetaDataGroup')
meta_data_element = job_card.find("div", class_="jobMetaDataGroup")
if meta_data_element:
meta_data_items = meta_data_element.find_all('div', class_='css-1cvo3fd')
meta_data_items = meta_data_element.find_all("div", class_="css-1cvo3fd")
meta_data: list = [item.text.strip() for item in meta_data_items]
else:
meta_data: list = []

job_description_element = job_card.find('div', class_='css-9446fg')
job_description_element = job_card.find("div", class_="css-9446fg")
if job_description_element:
li_elements = job_description_element.find_all('li')
li_elements = job_description_element.find_all("li")
job_description: list = [li.text.strip() for li in li_elements]
else:
job_description: list = []

job_post_state_element = job_card.find('span', class_='css-qvloho')
job_post_state: str = job_post_state_element.contents[-1].text.strip() if job_post_state_element else None
job_post_state_element = job_card.find("span", class_="css-qvloho")
job_post_state: str = (
job_post_state_element.contents[-1].text.strip()
if job_post_state_element
else None
)

job = {
"title": job_title,
Expand All @@ -141,6 +149,6 @@ def scrape_job_info(self, job_card):
"link": link,
"tags": meta_data,
"description": job_description,
"posted_day": job_post_state
"posted_day": job_post_state,
}
return job
1 change: 0 additions & 1 deletion src/test/espncricinfo_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@


class ESPNTest(unittest.TestCase):

def test_connection(self):
instance = Espncricinfo()
self.assertTrue(
Expand Down

0 comments on commit 8f3a428

Please sign in to comment.