Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

quick fix for LinkedIn Automation #961

Closed
wants to merge 10 commits into from
84 changes: 49 additions & 35 deletions src/ai_hawk/job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from itertools import product
from pathlib import Path
import traceback
from turtle import color

from inputimeout import inputimeout, TimeoutOccurred
from selenium.common.exceptions import NoSuchElementException
Expand Down Expand Up @@ -252,8 +253,10 @@ def get_jobs_from_page(self, scroll=False):
pass

try:
# XPath query to find the ul tag with class scaffold-layout__list-container
jobs_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]"
# XPath query to find the ul tag with class scaffold-layout__list
jobs_xpath_query = (
"//div[contains(@class, 'scaffold-layout__list-detail-container')]//ul"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While the code works, the scaffold-layout__list class appears to be closer to the target ul element than scaffold-layout__list-detail-container, making it more appropriate for selection.

)
jobs_container = self.driver.find_element(By.XPATH, jobs_xpath_query)

if scroll:
Expand All @@ -263,7 +266,10 @@ def get_jobs_from_page(self, scroll=False):
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement)
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement, step=300, reverse=True)

job_element_list = jobs_container.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]")
job_element_list = jobs_container.find_elements(
By.XPATH,
".//li[contains(@class, 'scaffold-layout__list-item') and contains(@class, 'ember-view')]",
)

if not job_element_list:
logger.debug("No job class elements found on page, skipping.")
Expand Down Expand Up @@ -302,7 +308,7 @@ def apply_jobs(self):
for job in job_list:

logger.debug(f"Starting applicant for job: {job.title} at {job.company}")
#TODO fix apply threshold
# TODO fix apply threshold
"""
# Initialize applicants_count as None
applicants_count = None
Expand Down Expand Up @@ -354,7 +360,6 @@ def apply_jobs(self):

# Continue with the job application process regardless of the applicants count check
"""


if self.is_previously_failed_to_apply(job.link):
logger.debug(f"Previously failed to apply for {job.title} at {job.company}, skipping...")
Expand Down Expand Up @@ -391,10 +396,10 @@ def write_to_file(self, job : Job, file_name, reason=None):
"job_location": job.location,
"pdf_path": pdf_path
}

if reason:
data["reason"] = reason

file_path = self.output_file_directory / f"{file_name}.json"
if not file_path.exists():
with open(file_path, 'w', encoding='utf-8') as f:
Expand Down Expand Up @@ -427,8 +432,7 @@ def get_base_search_url(self, parameters):
if working_type_filter:
url_parts.append(f"f_WT={'%2C'.join(working_type_filter)}")

experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if
v]
experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if v]
if experience_levels:
url_parts.append(f"f_E={','.join(experience_levels)}")
url_parts.append(f"distance={parameters['distance']}")
Expand All @@ -454,29 +458,56 @@ def next_job_page(self, position, location, job_page):
self.driver.get(
f"https://www.linkedin.com/jobs/search/{self.base_search_url}&keywords={encoded_position}{location}&start={job_page * 25}")


def job_tile_to_job(self, job_tile) -> Job:
logger.debug("Extracting job information from tile")
job = Job()

# Extract job Title
try:
job.title = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').find_element(By.TAG_NAME, 'strong').text
title_element = job_tile.find_element(
By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__title')]//a"
)
job.title = title_element.text.strip()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've got job titles duplicated with this code: "job_title": "Senior Hardware Experience Designer\nSenior Hardware Experience Designer",
Suggestion: job.title = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title--link').find_element(By.TAG_NAME, 'strong').text

logger.debug(f"Job title extracted: {job.title}")
except NoSuchElementException:
logger.warning("Job title is missing.")


# Extract job Link
try:
job.link = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').get_attribute('href').split('?')[0]
job.link = title_element.get_attribute("href").split("?")[0]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There’s a potential unhandled exception here. If the previos line raised an exception and title_element is not found, this line will fail and raise a different exception rather than NoSuchElementException, causing the entire program to stop abruptly.
Suggestion: job.link = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title--link').get_attribute('href').split('?')[0]

logger.debug(f"Job link extracted: {job.link}")
except NoSuchElementException:
logger.warning("Job link is missing.")

# Extract Company Name
Copy link

@PetrShchukin PetrShchukin Nov 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Company name and location are splited by a dot now. I've got Company name along with the Location when I did --collect.
Suggestion:

# Extract Company Name and Location
        try:
            full_text = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text
            company, location = full_text.split('·')
            job.company = company.strip()
            logger.debug(f"Job company extracted: {job.company}")
            job.location = location.strip()
            logger.debug(f"Job location extracted: {job.location}")
        except NoSuchElementException as e:
            logger.warning(f'Job company and location are missing. {e} {traceback.format_exc()}')

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for all of your advices!!! I will try and apply

try:
job.company = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text
job.company = job_tile.find_element(
By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span"
).text.strip()
logger.debug(f"Job company extracted: {job.company}")
except NoSuchElementException as e:
logger.warning(f'Job company is missing. {e} {traceback.format_exc()}')

logger.warning(f"Job company is missing. {e} {traceback.format_exc()}")

# Extract job Location
try:
job.location = job_tile.find_element(
By.XPATH, ".//ul[contains(@class, 'job-card-container__metadata-wrapper')]//li"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for me job-card-container__metadata-wrapper is empty tag.
image

).text.strip()
logger.debug(f"Job location extracted: {job.location}")
except NoSuchElementException:
logger.warning("Job location is missing.")

# Extract job State
try:
job_state = job_tile.find_element(
By.XPATH,
".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-item')]",
).text
logger.debug(f"Job state extracted: {job_state}")
job.apply_method = job_state
except NoSuchElementException as e:
logger.warning(f"Apply method and state not found. {e} {traceback.format_exc()}")

# Extract job ID from job url
try:
match = re.search(r'/jobs/view/(\d+)/', job.link)
Expand All @@ -488,23 +519,6 @@ def job_tile_to_job(self, job_tile) -> Job:
except Exception as e:
logger.warning(f"Failed to extract job ID: {e}", exc_info=True)

try:
job.location = job_tile.find_element(By.CLASS_NAME, 'job-card-container__metadata-item').text
except NoSuchElementException:
logger.warning("Job location is missing.")


try:
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__apply-method')]").text
except NoSuchElementException as e:
try:
# Fetching state when apply method is not found
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-job-state')]").text
job.apply_method = "Applied"
logger.warning(f'Apply method not found, state {job_state}. {e} {traceback.format_exc()}')
except NoSuchElementException as e:
logger.warning(f'Apply method and state not found. {e} {traceback.format_exc()}')

return job

def is_blacklisted(self, job_title, company, link, job_location):
Expand Down Expand Up @@ -558,10 +572,10 @@ def is_previously_failed_to_apply(self, link):
except json.JSONDecodeError:
logger.error(f"JSON decode error in file: {file_path}")
return False

for data in existing_data:
data_link = data['link']
if data_link == link:
return True

return False
Loading