AIHawk-FOSS · cjbbb · Nov 27, 2024 · Nov 27, 2024 · Nov 27, 2024 · Nov 27, 2024
diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py
@@ -5,6 +5,7 @@
 from itertools import product
 from pathlib import Path
 import traceback
+from turtle import color
 
 from inputimeout import inputimeout, TimeoutOccurred
 from selenium.common.exceptions import NoSuchElementException
@@ -252,8 +253,10 @@ def get_jobs_from_page(self, scroll=False):
             pass
 
         try:
-            # XPath query to find the ul tag with class scaffold-layout__list-container
-            jobs_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]"
+            # XPath query to find the ul tag with class scaffold-layout__list
+            jobs_xpath_query = (
+                "//div[contains(@class, 'scaffold-layout__list-detail-container')]//ul"
+            )
             jobs_container = self.driver.find_element(By.XPATH, jobs_xpath_query)
 
             if scroll:
@@ -263,7 +266,10 @@ def get_jobs_from_page(self, scroll=False):
                 browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement)
                 browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement, step=300, reverse=True)
 
-            job_element_list = jobs_container.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]")
+            job_element_list = jobs_container.find_elements(
+                By.XPATH,
+                ".//li[contains(@class, 'scaffold-layout__list-item') and contains(@class, 'ember-view')]",
+            )
 
             if not job_element_list:
                 logger.debug("No job class elements found on page, skipping.")
@@ -302,7 +308,7 @@ def apply_jobs(self):
         for job in job_list:
 
             logger.debug(f"Starting applicant for job: {job.title} at {job.company}")
-            #TODO fix apply threshold
+            # TODO fix apply threshold
             """
                 # Initialize applicants_count as None
                 applicants_count = None
@@ -354,7 +360,6 @@ def apply_jobs(self):
 
             # Continue with the job application process regardless of the applicants count check
             """
-
 
             if self.is_previously_failed_to_apply(job.link):
                 logger.debug(f"Previously failed to apply for {job.title} at {job.company}, skipping...")
@@ -391,10 +396,10 @@ def write_to_file(self, job : Job, file_name, reason=None):
             "job_location": job.location,
             "pdf_path": pdf_path
         }
-        
+
         if reason:
             data["reason"] = reason
-            
+
         file_path = self.output_file_directory / f"{file_name}.json"
         if not file_path.exists():
             with open(file_path, 'w', encoding='utf-8') as f:
@@ -427,8 +432,7 @@ def get_base_search_url(self, parameters):
         if working_type_filter:
             url_parts.append(f"f_WT={'%2C'.join(working_type_filter)}")
 
-        experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if
-                             v]
+        experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if v]
         if experience_levels:
             url_parts.append(f"f_E={','.join(experience_levels)}")
         url_parts.append(f"distance={parameters['distance']}")
@@ -454,29 +458,56 @@ def next_job_page(self, position, location, job_page):
         self.driver.get(
             f"https://www.linkedin.com/jobs/search/{self.base_search_url}&keywords={encoded_position}{location}&start={job_page * 25}")
 
-
     def job_tile_to_job(self, job_tile) -> Job:
         logger.debug("Extracting job information from tile")
         job = Job()
 
+        # Extract job Title
         try:
-            job.title = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').find_element(By.TAG_NAME, 'strong').text
+            title_element = job_tile.find_element(
+                By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__title')]//a"
+            )
+            job.title = title_element.text.strip()
             logger.debug(f"Job title extracted: {job.title}")
         except NoSuchElementException:
             logger.warning("Job title is missing.")
-
+
+        # Extract job Link
         try:
-            job.link = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').get_attribute('href').split('?')[0]
+            job.link = title_element.get_attribute("href").split("?")[0]
             logger.debug(f"Job link extracted: {job.link}")
         except NoSuchElementException:
             logger.warning("Job link is missing.")
 
+        # Extract Company Name
         try:
-            job.company = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text
+            job.company = job_tile.find_element(
+                By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span"
+            ).text.strip()
             logger.debug(f"Job company extracted: {job.company}")
         except NoSuchElementException as e:
-            logger.warning(f'Job company is missing. {e} {traceback.format_exc()}')
-
+            logger.warning(f"Job company is missing. {e} {traceback.format_exc()}")
+
+        # Extract job Location
+        try:
+            job.location = job_tile.find_element(
+                By.XPATH, ".//ul[contains(@class, 'job-card-container__metadata-wrapper')]//li"
+            ).text.strip()
+            logger.debug(f"Job location extracted: {job.location}")
+        except NoSuchElementException:
+            logger.warning("Job location is missing.")
+
+        # Extract job State
+        try:
+            job_state = job_tile.find_element(
+                By.XPATH,
+                ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-item')]",
+            ).text
+            logger.debug(f"Job state extracted: {job_state}")
+            job.apply_method = job_state
+        except NoSuchElementException as e:
+            logger.warning(f"Apply method and state not found. {e} {traceback.format_exc()}")
+
         # Extract job ID from job url
         try:
             match = re.search(r'/jobs/view/(\d+)/', job.link)
@@ -488,23 +519,6 @@ def job_tile_to_job(self, job_tile) -> Job:
         except Exception as e:
             logger.warning(f"Failed to extract job ID: {e}", exc_info=True)
 
-        try:
-            job.location = job_tile.find_element(By.CLASS_NAME, 'job-card-container__metadata-item').text
-        except NoSuchElementException:
-            logger.warning("Job location is missing.")
-
-
-        try:
-            job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__apply-method')]").text
-        except NoSuchElementException as e:
-            try:
-                # Fetching state when apply method is not found
-                job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-job-state')]").text
-                job.apply_method = "Applied"
-                logger.warning(f'Apply method not found, state {job_state}. {e} {traceback.format_exc()}')
-            except NoSuchElementException as e:
-                logger.warning(f'Apply method and state not found. {e} {traceback.format_exc()}')
-
         return job
 
     def is_blacklisted(self, job_title, company, link, job_location):
@@ -558,10 +572,10 @@ def is_previously_failed_to_apply(self, link):
             except json.JSONDecodeError:
                 logger.error(f"JSON decode error in file: {file_path}")
                 return False
-            
+
         for data in existing_data:
             data_link = data['link']
             if data_link == link:
                 return True
-                
+
         return False