Skip to content

Commit

Permalink
Merge pull request #923 from msabry1/wuzzuf-feature
Browse files Browse the repository at this point in the history
Wuzzuf feature Added
  • Loading branch information
nikhil25803 authored May 14, 2024
2 parents 12ee113 + 9caa54c commit a314fa9
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 0 deletions.
14 changes: 14 additions & 0 deletions dev-documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -1653,3 +1653,17 @@ geeksforgeeks = Geeksforgeeks(user="username")
| `.get_profile()` | Returns the user data in json format. |

---

## Wuzzuf

```python
from scrap-up import wuzzuf
jobs = wuzzuf.Jobs()
```

The `Jobs` class provides methods for configuring scraping parameters and fetching job listings:

| Methods | Details |
| --------------- | ---------------------------------------------------------------------------------------- |
| `.filter_job()` | Apply filters such as job title, country, city, and range of years of experience. |
| `.fetch_jobs()` | Fetch job listings from the website based on the applied filters, across multiple pages. |
3 changes: 3 additions & 0 deletions src/scrape_up/wuzzuf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .wuzzuf import Jobs

__all__ = ["Jobs"]
131 changes: 131 additions & 0 deletions src/scrape_up/wuzzuf/wuzzuf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import requests
from bs4 import BeautifulSoup
from time import sleep


class Jobs:
"""
Create an instance of the class `Jobs`
```python
scraper = Jobs()
```
| Methods | Details |
| ----------------------------- | -------------------------------------------------------------------------------------------------- |
| `.filter_job()` | Apply filters to the job search using parameters like title, country, city, minimum and maximum years of experience. |
| `.fetch_jobs()` | Fetch job listings based on the applied filters, with an optional maximum number of pages to scrape. |
"""

def __init__(self):
self.url = "https://wuzzuf.net/search/jobs/?"

def filter_job(
self,
title=None,
country=None,
city=None,
min_years_of_experience=None,
max_years_of_experience=None,
):
"""
Apply filters to the job search.
Parameters:
- `title` (str): Job title to search for.
- `country` (str): Country to search for jobs in.
- `city` (str): City to search for jobs in.
- `min_years_of_experience` (int): Minimum years of experience required.
- `max_years_of_experience` (int): Maximum years of experience allowed.
Example:
```python
scraper.filter_job(title="software engineer", country="Egypt", city="Cairo", min_years_of_experience=2, max_years_of_experience=5)
```
"""
if title:
title = title.replace(" ", "+")
self.url += f"q={title}"
if country:
self.url += f"&filters[country][0]={country.strip().capitalize()}"
if city:
self.url += f"&filters[city][0]={city.strip().capitalize()}"
if min_years_of_experience:
self.url += (
f"&filters[years_of_experience_min][0]={min_years_of_experience}"
)
if max_years_of_experience:
self.url += (
f"&filters[years_of_experience_max][0]={max_years_of_experience}"
)

def __fetch_page_jobs(self, page_num):
response = requests.get(self.url + f"&start={page_num}")
if response.status_code == 200:
parsed_html = BeautifulSoup(response.content, "lxml")
jobs_data = parsed_html.find_all("div", {"class": "css-1gatmva e1v1l3u10"})
job_sub_list = []
for job_data in jobs_data:
job = {
"name": self.__get_job_name(job_data),
"url": self.__get_job_url(job_data),
"company": self.__get_job_company(job_data),
"location": self.__get_job_location(job_data),
"published_time": self.__get_published_time(job_data),
"properties": self.__get_job_properties(job_data),
}
job_sub_list.append(job)
return job_sub_list
else:
raise None

def fetch_jobs(self, max_page_number=50):
"""
Fetch job listings based on the applied filters.
Parameters:
- `max_page_number` (int): Maximum number of pages to scrape (default is 50).
Returns:
- `list`: A list of dictionaries representing the fetched job listings.
Example:
```python
jobs = scraper.fetch_jobs(max_page_number=5)
```
"""
job_list = []
try:
for page_num in range(max_page_number):
job_sub_list = self.__fetch_page_jobs(page_num)
if job_sub_list:
job_list.extend(job_sub_list)
else:
break
sleep(1)
except requests.RequestException as e:
return None
return job_list

def __get_job_name(self, job_data):
return job_data.find("h2", {"class": "css-m604qf"}).find("a").text.strip()

def __get_job_url(self, job_data):
return job_data.find("h2", {"class": "css-m604qf"}).find("a")["href"]

def __get_job_company(self, job_data):
return job_data.find("div", {"class": "css-d7j1kk"}).find("a").text[:-1].strip()

def __get_job_location(self, job_data):
data = job_data.find("span", {"class": "css-5wys0k"})
return data.text.strip() if data else None

def __get_published_time(self, job_data):
return (
job_data.find("div", {"class": "css-4c4ojb"})
or job_data.find("div", {"class": "css-do6t5g"})
).text.strip()

def __get_job_properties(self, job_data):
job_properties_string = " ,".join(
[prop.text for prop in job_data.find_all("span", {"class": "eoyjyou0"})]
)
return job_properties_string if job_properties_string else None

0 comments on commit a314fa9

Please sign in to comment.