Merge pull request #940 from AKASH722/main

Feat: FlexJobs Scrapper added
Clueless-Community · May 16, 2024 · f6a9ea0 · f6a9ea0
2 parents cc699bf + f5e9fe5
commit f6a9ea0
Show file tree

Hide file tree

Showing 4 changed files with 218 additions and 0 deletions.
diff --git a/dev-documentation.md b/dev-documentation.md
@@ -420,6 +420,29 @@ infosys = StockPrice('infosys','nse')
 
 ---
 
+### Flex Jobs
+
+```python
+    flex_jobs = FlexJobs(search_query, location_query, min_jobs)
+```
+
+- Attributes
+
+| Attribute        | Description                                                       |
+| ---------------- | ----------------------------------------------------------------- |
+| `search_query`   | The search query to filter job listings.                          |
+| `location_query` | The location query to filter job listings (defaults to '').       |
+| `min_jobs`       | The maximum number of job listings to retrieve (defaults to 100). |
+
+- Methods
+
+| Method                                 | Description                                                                                                                               |
+| -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
+| `get_jobs() -> list`                   | Retrieves job listings from FlexJobs website based on search and location queries. Returns a list of dictionaries containing job details. |
+| `scrape_job_info(job_listing) -> dict` | Extracts job details from a job listing HTML element.                                                                                     |
+
+---
+
 ## IMDb
 
 Create an instance of the `IMDB` class.

diff --git a/src/scrape_up/flexjobs/__init__.py b/src/scrape_up/flexjobs/__init__.py
@@ -0,0 +1,3 @@
+from .flexjobs import FlexJobs
+
+__all__ = ["FlexJobs"]
diff --git a/src/scrape_up/flexjobs/flexjobs.py b/src/scrape_up/flexjobs/flexjobs.py
@@ -0,0 +1,166 @@
+import requests
+from bs4 import BeautifulSoup
+
+
+class FlexJobs:
+    """
+    ```python
+    flex_jobs = FlexJobs(search_query, location_query, min_jobs)
+    ```
+
+    Attributes:
+    | Attribute         | Optional | Details                                        |
+    |-------------------|----------|------------------------------------------------|
+    | `search_query`    | No       | The search query for job listings              |
+    | `location_query`  | Yes      | The location query for job listings            |
+    | `min_jobs`        | Yes      | The minimum number of job listings to retrieve |
+
+    Methods:
+    | Method                          | Details                                                                  |
+    |---------------------------------|--------------------------------------------------------------------------|
+    | `.get_jobs()`                   | Returns a list of job listings based on the search and location queries. |
+    | `.scrape_job_info(job_listing)` | Extracts job details from a given job listing HTML element               |
+    """
+
+    def __init__(
+        self, search_query: str, location_query: str = "", min_jobs: int = 100
+    ):
+        self.search_query = search_query
+        self.location_query = location_query
+        self.min_jobs = min_jobs
+
+    def get_jobs(self):
+        """
+        Retrieves job listings based on the search and location queries.
+
+        Returns:
+        list: A list of dictionaries, each containing details of a job listing.
+        ```js
+        [
+            {
+                'title': 'Contract Administrator',
+                'location': 'Springboro, OH',
+                'link': 'https://www.flexjobs.com//HostedJob.aspx?id=2061188',
+                'posted_day': '13 days ago',
+                'remote': 'Hybrid Remote Work',
+                'schedule': 'Full-Time',
+                'job_type': 'Freelance',
+                'salary': 'A range of 70,000.00 - 90,000.00 USD Annually',
+                'description': 'Coordinate and administer construction contracts, prepare bid documentation, manage purchase orders and subcontracts, manage certificates of insurance and bonds, and liaise with internal teams, clients, and subcontractors.',
+            },
+            {...},
+            {...},
+        ]
+        ```
+        """
+        # Formatting search and location queries for URL
+        search = self.search_query.strip().replace(" ", "%20")
+        location = self.location_query.strip().replace(" ", "%2C%20")
+
+        # Constructing base URL
+        base_url: str = f"https://www.flexjobs.com/search?searchkeyword={search}"
+        if location != "":
+            base_url = f"{base_url}&joblocations={location}"
+
+        job_listings = []
+        page = 1
+        # Loop until maximum job listings are retrieved or no more listings available
+        while len(job_listings) < self.min_jobs:
+            url = f"{base_url}&page={page}"
+            response = requests.get(url)
+            if response.status_code == 200:
+                # Parsing HTML content
+                soup = BeautifulSoup(response.content, "html.parser")
+                job_cards = soup.find_all("div", class_="sc-jv5lm6-0")
+                if not job_cards:
+                    break
+                # Extracting job information from each listing
+                for job_card in job_cards:
+                    job_listings.append(self.scrape_job_info(job_card))
+                    if len(job_listings) >= self.min_jobs:
+                        break
+                page += 1
+            else:
+                print(f"Failed to fetch URL: {url}")
+                break
+        return job_listings
+
+    def scrape_job_info(self, job_listing):
+        """
+        Extracts job details from a given job listing HTML element.
+
+        Args:
+        job_listing: BeautifulSoup HTML element representing a job listing.
+
+        Returns:
+        dict: A dictionary containing details of the extracted job.
+        ```js
+        {
+            'title': 'Contract Administrator',
+            'location': 'Springboro, OH',
+            'link': 'https://www.flexjobs.com//HostedJob.aspx?id=2061188',
+            'posted_day': '13 days ago',
+            'remote': 'Hybrid Remote Work',
+            'schedule': 'Full-Time',
+            'job_type': 'Freelance',
+            'salary': 'A range of 70,000.00 - 90,000.00 USD Annually',
+            'description': 'Coordinate and administer construction contracts, prepare bid documentation, manage purchase orders and subcontracts, manage certificates of insurance and bonds, and liaise with internal teams, clients, and subcontractors.',
+        }
+        ```
+        """
+        # Extracting job details from HTML elements
+        job_title_element = job_listing.find(
+            "a", id=lambda x: x and x.startswith("job-name-")
+        )
+        job_title = job_title_element.text.strip() if job_title_element else None
+
+        link: str = (
+            "https://www.flexjobs.com/" + job_title_element["href"]
+            if job_title_element
+            else None
+        )
+
+        location_element = job_listing.find(
+            "span", id=lambda x: x and x.startswith("allowedlocation-")
+        )
+        location = location_element.text.strip() if location_element else None
+
+        job_age_element = job_listing.find(
+            "div", id=lambda x: x and x.startswith("job-age-")
+        )
+        job_age = job_age_element.text.strip() if job_age_element else None
+
+        remote_option = job_listing.find(
+            "li", id=lambda x: x and x.startswith("remoteoption-")
+        )
+        remote = remote_option.text.strip() if remote_option else None
+
+        job_schedule = job_listing.find(
+            "li", id=lambda x: x and x.startswith("jobschedule-")
+        )
+        schedule = job_schedule.text.strip() if job_schedule else None
+
+        job_type = job_listing.find("li", id=lambda x: x and x.startswith("jobTypes-"))
+        job_type_text = job_type.text.strip() if job_type else None
+
+        salary_range = job_listing.find(
+            "li", id=lambda x: x and x.startswith("salartRange-")
+        )
+        salary = salary_range.text.strip() if salary_range else None
+
+        description = job_listing.find("p", class_="sc-jv5lm6-4")
+        job_description = description.text.strip() if description else None
+
+        job = {
+            "title": job_title,
+            "location": location,
+            "link": link,
+            "posted_day": job_age,
+            "remote": remote,
+            "schedule": schedule,
+            "job_type": job_type_text,
+            "salary": salary,
+            "description": job_description,
+        }
+
+        return job
diff --git a/src/test/flexjobs_test.py b/src/test/flexjobs_test.py
@@ -0,0 +1,26 @@
+import unittest
+
+from scrape_up.flexjobs import FlexJobs
+
+
+class TestFlexJobs(unittest.TestCase):
+    def test_get_jobs_with_valid_search_query(self):
+        flexjobs = FlexJobs("python developer")
+        jobs = flexjobs.get_jobs()
+        self.assertTrue(len(jobs) > 0, "No jobs found for valid search query")
+
+    def test_get_jobs_with_location_query(self):
+        flexjobs = FlexJobs("python developer", "New York")
+        jobs = flexjobs.get_jobs()
+        self.assertTrue(len(jobs) > 0, "No jobs found for valid location query")
+
+    def test_get_jobs_with_min_jobs_limit(self):
+        flexjobs = FlexJobs("python developer", min_jobs=5)
+        jobs = flexjobs.get_jobs()
+        self.assertTrue(
+            len(jobs) >= 5, "Number of jobs retrieved exceeds max jobs limit"
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .flexjobs import FlexJobs

		__all__ = ["FlexJobs"]