Skip to content

Commit

Permalink
Merge pull request #458 from MrTpat/execution-chains
Browse files Browse the repository at this point in the history
  • Loading branch information
feder-cr authored Oct 15, 2024
2 parents e3cb3dc + 86f6b2c commit dceae26
Show file tree
Hide file tree
Showing 4 changed files with 243 additions and 24 deletions.
35 changes: 12 additions & 23 deletions src/aihawk_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from src.job import Job
from src.aihawk_easy_applier import AIHawkEasyApplier
from loguru import logger
from src.extractors.extraction_chains import EXTRACTORS


class EnvironmentKeys:
Expand Down Expand Up @@ -46,6 +47,7 @@ def __init__(self, driver):
self.easy_applier_component = None
self.job_application_profile = None
self.seen_jobs = []
self.extractor = None
logger.debug("AIHawkJobManager initialized successfully")

def set_parameters(self, parameters):
Expand Down Expand Up @@ -253,29 +255,16 @@ def get_jobs_from_page(self):
return []

def apply_jobs(self):
try:
# Check if no matching jobs are found on the current page
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand')
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower():
logger.debug("No matching jobs found on this page, skipping")
return
except NoSuchElementException:
pass

# Find the job results container and job elements
job_results = self.driver.find_element(By.CLASS_NAME, "jobs-search-results-list")

# utils.scroll_slow(self.driver, job_results)
# utils.scroll_slow(self.driver, job_results, step=300, reverse=True)

job_list_elements = job_results.find_elements(By.CLASS_NAME, 'jobs-search-results__list-item')

if not job_list_elements:
utils.printyellow("No job class elements found on page, moving to next page.")
logger.debug("No job class elements found on page, skipping")
return

job_list = [Job(*self.extract_job_information_from_tile(job_element)) for job_element in job_list_elements]
job_list = []
if self.extractor is not None: # we found a working extractor
job_list = self.extractor.get_job_list(self.driver)
else:
for e in EXTRACTORS:
extracted_jobs = e.get_job_list(self.driver)
if len(extracted_jobs) > 0:
job_list = extracted_jobs # break when we find a valid extractor
self.extractor = e
break

for job in job_list:
logger.debug(f"Starting applicant count search for job: {job.title} at {job.company}")
Expand Down
Empty file added src/extractors/__init__.py
Empty file.
230 changes: 230 additions & 0 deletions src/extractors/extraction_chains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
from abc import ABC, abstractmethod
from src.job import Job
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from loguru import logger


# An interface that defines different extraction strategies for the linkedin jobs page.
class Extractor(ABC):
@abstractmethod
def get_job_list(self, driver) -> list[Job]:
pass


# The only extractor living in code as of writing this.
class Extractor1(Extractor):
def get_job_list(self, driver) -> list[Job]:
try:
no_jobs_element = driver.find_element(
By.CLASS_NAME, "jobs-search-two-pane__no-results-banner--expand"
)
if (
"No matching jobs found" in no_jobs_element.text
or "unfortunately, things aren" in driver.page_source.lower()
):
logger.debug("No matching jobs found on this page, skipping")
return []
except NoSuchElementException:
pass

job_list_elements = driver.find_elements(
By.CLASS_NAME, "scaffold-layout__list-container"
)[0].find_elements(By.CLASS_NAME, "jobs-search-results__list-item")

if not job_list_elements:
logger.debug("No job class elements found on page, skipping")
return

job_list = [
Job(*self.extract_job_information_from_tile(job_element))
for job_element in job_list_elements
]
return list(filter(lambda j: len(j.link) > 0, job_list))

def extract_job_information_from_tile(self, job_tile):
logger.debug("Extracting job information from tile")
job_title, company, job_location, apply_method, link = "", "", "", "", ""
try:
print(job_tile.get_attribute("outerHTML"))
job_title = (
job_tile.find_element(By.CLASS_NAME, "job-card-list__title")
.find_element(By.TAG_NAME, "strong")
.text
)

link = (
job_tile.find_element(By.CLASS_NAME, "job-card-list__title")
.get_attribute("href")
.split("?")[0]
)
company = job_tile.find_element(
By.CLASS_NAME, "job-card-container__primary-description"
).text
logger.debug(f"Job information extracted: {job_title} at {company}")
except NoSuchElementException:
logger.warning("Some job information (title, link, or company) is missing.")
try:
job_location = job_tile.find_element(
By.CLASS_NAME, "job-card-container__metadata-item"
).text
except NoSuchElementException:
logger.warning("Job location is missing.")
try:
apply_method = job_tile.find_element(
By.CLASS_NAME, "job-card-container__apply-method"
).text
except NoSuchElementException:
apply_method = "Applied"
logger.warning("Apply method not found, assuming 'Applied'.")

return job_title, company, job_location, link, apply_method


class Extractor2(Extractor):
def get_job_list(self, driver) -> list[Job]:
try:
# Wait for the job list container to be present
WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.CLASS_NAME, "scaffold-layout__list-container")
)
)

# Find the job list container
job_list_container = driver.find_element(
By.CLASS_NAME, "scaffold-layout__list-container"
)

# Find all job items within the container
job_list_elements = job_list_container.find_elements(
By.CSS_SELECTOR,
"li.ember-view.jobs-search-results__list-item.occludable-update.p0.relative.scaffold-layout__list-item",
)

print(f"Number of job elements found: {len(job_list_elements)}")

if not job_list_elements:
raise Exception("No job elements found on page")

job_list = [
Job(*self.extract_job_information_from_tile(job_element))
for job_element in job_list_elements
]
return list(filter(lambda j: len(j.link) > 0, job_list))
except Exception as e:
return []

def extract_job_information_from_tile(self, job_tile):
job_title, company, job_location, apply_method, link = "", "", "", "", ""
try:
job_title = job_tile.find_element(
By.CSS_SELECTOR, "a.job-card-list__title"
).text
link = (
job_tile.find_element(By.CSS_SELECTOR, "a.job-card-list__title")
.get_attribute("href")
.split("?")[0]
)
company = job_tile.find_element(
By.CSS_SELECTOR, ".job-card-container__primary-description"
).text
job_location = job_tile.find_element(
By.CSS_SELECTOR, ".job-card-container__metadata-item"
).text
apply_method = job_tile.find_element(
By.CSS_SELECTOR, ".job-card-container__apply-method"
).text
except NoSuchElementException as e:
print(f"Error extracting job information: {str(e)}")

return job_title, company, job_location, link, apply_method



# The only extractor living in code as of writing this.
class Extractor3(Extractor):
def get_job_list(self, driver) -> list[Job]:
try:
no_jobs_element = driver.find_element(
By.CLASS_NAME, "jobs-search-two-pane__no-results-banner--expand"
)
if (
"No matching jobs found" in no_jobs_element.text
or "unfortunately, things aren" in driver.page_source.lower()
):
logger.debug("No matching jobs found on this page, skipping")
return []
except NoSuchElementException:
pass

job_list_elements = driver.find_elements(
By.CLASS_NAME, "scaffold-layout__list-container"
)[0].find_elements(By.CLASS_NAME, "jobs-search-results__list-item")

if not job_list_elements:
logger.debug("No job class elements found on page, skipping")
return

job_list = [
Job(*self.extract_job_information_from_tile(job_element))
for job_element in job_list_elements
]
return list(filter(lambda j: len(j.link) > 0, job_list))

def extract_job_information_from_tile(self, job_tile):
job_title = link = company = job_location = apply_method = ""
logger.debug("Extracting job information from tile")

try:
print(job_tile.get_attribute("outerHTML"))

# Extract job title
job_title = (
job_tile.find_element(
By.CLASS_NAME, "job-card-job-posting-card-wrapper__title"
)
.find_element(By.TAG_NAME, "strong")
.text
)

# Extract job link
link = (
job_tile.find_element(By.CLASS_NAME, "app-aware-link")
.get_attribute("href")
)

# Extract company name
company = job_tile.find_element(
By.CLASS_NAME, "artdeco-entity-lockup__subtitle"
).text

# Extract job location
job_location = job_tile.find_element(
By.CLASS_NAME, "artdeco-entity-lockup__caption"
).text

# Apply method (if it exists)
try:
apply_method = job_tile.find_element(
By.CLASS_NAME, "job-card-job-posting-card-wrapper__footer-item"
).text
except NoSuchElementException:
logger.warning("Apply method not found for this job tile.")

logger.debug(
f"Job information extracted: {job_title} at {company}, located in {job_location}"
)
except NoSuchElementException:
logger.warning(
"Some job information (title, link, or company) could not be parsed."
)



return job_title, company, job_location, link, apply_method


EXTRACTORS = [Extractor1(), Extractor2(), Extractor3()]
2 changes: 1 addition & 1 deletion tests/test_aihawk_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def test_apply_jobs_no_jobs(mocker, job_manager):
job_manager.apply_jobs()

# Ensure it attempted to find the job results list
assert job_manager.driver.find_element.call_count == 1
assert job_manager.driver.find_element.call_count > 0


def test_apply_jobs_with_jobs(mocker, job_manager):
Expand Down

0 comments on commit dceae26

Please sign in to comment.