-
-
Notifications
You must be signed in to change notification settings - Fork 3.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
quick fix for LinkedIn Automation #961
Changes from 8 commits
75d19ef
a91b056
dbebfa8
83b2205
9a19729
48e6c6c
19eaeee
f8562af
888917a
8657843
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
from itertools import product | ||
from pathlib import Path | ||
import traceback | ||
from turtle import color | ||
|
||
from inputimeout import inputimeout, TimeoutOccurred | ||
from selenium.common.exceptions import NoSuchElementException | ||
|
@@ -252,8 +253,10 @@ def get_jobs_from_page(self, scroll=False): | |
pass | ||
|
||
try: | ||
# XPath query to find the ul tag with class scaffold-layout__list-container | ||
jobs_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]" | ||
# XPath query to find the ul tag with class scaffold-layout__list | ||
jobs_xpath_query = ( | ||
"//div[contains(@class, 'scaffold-layout__list-detail-container')]//ul" | ||
) | ||
jobs_container = self.driver.find_element(By.XPATH, jobs_xpath_query) | ||
|
||
if scroll: | ||
|
@@ -263,7 +266,10 @@ def get_jobs_from_page(self, scroll=False): | |
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement) | ||
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement, step=300, reverse=True) | ||
|
||
job_element_list = jobs_container.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]") | ||
job_element_list = jobs_container.find_elements( | ||
By.XPATH, | ||
".//li[contains(@class, 'scaffold-layout__list-item') and contains(@class, 'ember-view')]", | ||
) | ||
|
||
if not job_element_list: | ||
logger.debug("No job class elements found on page, skipping.") | ||
|
@@ -302,7 +308,7 @@ def apply_jobs(self): | |
for job in job_list: | ||
|
||
logger.debug(f"Starting applicant for job: {job.title} at {job.company}") | ||
#TODO fix apply threshold | ||
# TODO fix apply threshold | ||
""" | ||
# Initialize applicants_count as None | ||
applicants_count = None | ||
|
@@ -354,7 +360,6 @@ def apply_jobs(self): | |
|
||
# Continue with the job application process regardless of the applicants count check | ||
""" | ||
|
||
|
||
if self.is_previously_failed_to_apply(job.link): | ||
logger.debug(f"Previously failed to apply for {job.title} at {job.company}, skipping...") | ||
|
@@ -391,10 +396,10 @@ def write_to_file(self, job : Job, file_name, reason=None): | |
"job_location": job.location, | ||
"pdf_path": pdf_path | ||
} | ||
|
||
if reason: | ||
data["reason"] = reason | ||
|
||
file_path = self.output_file_directory / f"{file_name}.json" | ||
if not file_path.exists(): | ||
with open(file_path, 'w', encoding='utf-8') as f: | ||
|
@@ -427,8 +432,7 @@ def get_base_search_url(self, parameters): | |
if working_type_filter: | ||
url_parts.append(f"f_WT={'%2C'.join(working_type_filter)}") | ||
|
||
experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if | ||
v] | ||
experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if v] | ||
if experience_levels: | ||
url_parts.append(f"f_E={','.join(experience_levels)}") | ||
url_parts.append(f"distance={parameters['distance']}") | ||
|
@@ -454,29 +458,56 @@ def next_job_page(self, position, location, job_page): | |
self.driver.get( | ||
f"https://www.linkedin.com/jobs/search/{self.base_search_url}&keywords={encoded_position}{location}&start={job_page * 25}") | ||
|
||
|
||
def job_tile_to_job(self, job_tile) -> Job: | ||
logger.debug("Extracting job information from tile") | ||
job = Job() | ||
|
||
# Extract job Title | ||
try: | ||
job.title = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').find_element(By.TAG_NAME, 'strong').text | ||
title_element = job_tile.find_element( | ||
By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__title')]//a" | ||
) | ||
job.title = title_element.text.strip() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've got job titles duplicated with this code: |
||
logger.debug(f"Job title extracted: {job.title}") | ||
except NoSuchElementException: | ||
logger.warning("Job title is missing.") | ||
|
||
|
||
# Extract job Link | ||
try: | ||
job.link = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').get_attribute('href').split('?')[0] | ||
job.link = title_element.get_attribute("href").split("?")[0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There’s a potential unhandled exception here. If the previos line raised an exception and |
||
logger.debug(f"Job link extracted: {job.link}") | ||
except NoSuchElementException: | ||
logger.warning("Job link is missing.") | ||
|
||
# Extract Company Name | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Company name and location are splited by a dot now. I've got Company name along with the Location when I did
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for all of your advices!!! I will try and apply |
||
try: | ||
job.company = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text | ||
job.company = job_tile.find_element( | ||
By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span" | ||
).text.strip() | ||
logger.debug(f"Job company extracted: {job.company}") | ||
except NoSuchElementException as e: | ||
logger.warning(f'Job company is missing. {e} {traceback.format_exc()}') | ||
|
||
logger.warning(f"Job company is missing. {e} {traceback.format_exc()}") | ||
|
||
# Extract job Location | ||
try: | ||
job.location = job_tile.find_element( | ||
By.XPATH, ".//ul[contains(@class, 'job-card-container__metadata-wrapper')]//li" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
).text.strip() | ||
logger.debug(f"Job location extracted: {job.location}") | ||
except NoSuchElementException: | ||
logger.warning("Job location is missing.") | ||
|
||
# Extract job State | ||
try: | ||
job_state = job_tile.find_element( | ||
By.XPATH, | ||
".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-item')]", | ||
).text | ||
logger.debug(f"Job state extracted: {job_state}") | ||
job.apply_method = job_state | ||
except NoSuchElementException as e: | ||
logger.warning(f"Apply method and state not found. {e} {traceback.format_exc()}") | ||
|
||
# Extract job ID from job url | ||
try: | ||
match = re.search(r'/jobs/view/(\d+)/', job.link) | ||
|
@@ -488,23 +519,6 @@ def job_tile_to_job(self, job_tile) -> Job: | |
except Exception as e: | ||
logger.warning(f"Failed to extract job ID: {e}", exc_info=True) | ||
|
||
try: | ||
job.location = job_tile.find_element(By.CLASS_NAME, 'job-card-container__metadata-item').text | ||
except NoSuchElementException: | ||
logger.warning("Job location is missing.") | ||
|
||
|
||
try: | ||
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__apply-method')]").text | ||
except NoSuchElementException as e: | ||
try: | ||
# Fetching state when apply method is not found | ||
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-job-state')]").text | ||
job.apply_method = "Applied" | ||
logger.warning(f'Apply method not found, state {job_state}. {e} {traceback.format_exc()}') | ||
except NoSuchElementException as e: | ||
logger.warning(f'Apply method and state not found. {e} {traceback.format_exc()}') | ||
|
||
return job | ||
|
||
def is_blacklisted(self, job_title, company, link, job_location): | ||
|
@@ -558,10 +572,10 @@ def is_previously_failed_to_apply(self, link): | |
except json.JSONDecodeError: | ||
logger.error(f"JSON decode error in file: {file_path}") | ||
return False | ||
|
||
for data in existing_data: | ||
data_link = data['link'] | ||
if data_link == link: | ||
return True | ||
|
||
return False |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While the code works, the
scaffold-layout__list
class appears to be closer to the target ul element thanscaffold-layout__list-detail-container
, making it more appropriate for selection.