Skip to content

Commit 2b7b740

Browse files
authored
Merge pull request #1144 from aparupganguly/feature/o3-mini-job-resource
Feature/o3 mini job resource extractor
2 parents ac5c88b + 46f05a7 commit 2b7b740

File tree

1 file changed

+265
-0
lines changed

1 file changed

+265
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
import os
2+
import json
3+
import time
4+
import requests
5+
from dotenv import load_dotenv
6+
from openai import OpenAI
7+
from serpapi.google_search import GoogleSearch
8+
9+
class Colors:
10+
CYAN = '\033[96m'
11+
YELLOW = '\033[93m'
12+
GREEN = '\033[92m'
13+
RED = '\033[91m'
14+
RESET = '\033[0m'
15+
16+
load_dotenv()
17+
18+
# Initialize clients
19+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
20+
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
21+
serp_api_key = os.getenv("SERP_API_KEY")
22+
23+
def extract_job_requirements(url, api_key):
24+
"""Extract essential job requirements using Firecrawl."""
25+
print(f"{Colors.YELLOW}Extracting job requirements...{Colors.RESET}")
26+
27+
headers = {
28+
'Content-Type': 'application/json',
29+
'Authorization': f'Bearer {api_key}'
30+
}
31+
32+
prompt = """
33+
Extract only:
34+
- job_title: position title (string)
35+
- required_skills: top 5 technical skills (array)
36+
- experience_level: years required (string)
37+
"""
38+
39+
payload = {
40+
"urls": [url],
41+
"prompt": prompt,
42+
"enableWebSearch": False
43+
}
44+
45+
try:
46+
response = requests.post(
47+
"https://api.firecrawl.dev/v1/extract",
48+
headers=headers,
49+
json=payload,
50+
timeout=30
51+
)
52+
53+
data = response.json()
54+
if not data.get('success'):
55+
return None
56+
57+
return poll_extraction_result(data.get('id'), api_key)
58+
59+
except Exception as e:
60+
print(f"{Colors.RED}Error extracting job requirements: {str(e)}{Colors.RESET}")
61+
return None
62+
63+
def poll_extraction_result(extraction_id, api_key, interval=5, max_attempts=12):
64+
"""Poll for extraction results."""
65+
url = f"https://api.firecrawl.dev/v1/extract/{extraction_id}"
66+
headers = {'Authorization': f'Bearer {api_key}'}
67+
68+
for _ in range(max_attempts):
69+
try:
70+
response = requests.get(url, headers=headers, timeout=30)
71+
data = response.json()
72+
if data.get('success') and data.get('data'):
73+
return data['data']
74+
time.sleep(interval)
75+
except Exception as e:
76+
print(f"{Colors.YELLOW}Polling attempt failed, retrying...{Colors.RESET}")
77+
continue
78+
return None
79+
80+
def rank_and_summarize_resources(resources, skills):
81+
"""Use OpenAI to rank and summarize learning resources."""
82+
try:
83+
# Prepare resources for ranking
84+
all_resources = []
85+
for category, items in resources.items():
86+
for item in items:
87+
all_resources.append({
88+
"category": category,
89+
"title": item["title"],
90+
"url": item["url"]
91+
})
92+
93+
# Create prompt for OpenAI
94+
skills_str = ", ".join(skills)
95+
prompt = f"""Given these learning resources for skills ({skills_str}),
96+
rank them by relevance and quality, and provide a brief summary:
97+
98+
Resources:
99+
{json.dumps(all_resources, indent=2)}
100+
101+
For each resource, provide:
102+
1. Relevance score (1-10)
103+
2. Brief summary (max 2 sentences)
104+
3. Why it's useful for the target skills
105+
106+
Format as JSON with structure:
107+
{{
108+
"ranked_resources": [
109+
{{
110+
"category": "...",
111+
"title": "...",
112+
"url": "...",
113+
"relevance_score": X,
114+
"summary": "...",
115+
"usefulness": "..."
116+
}}
117+
]
118+
}}"""
119+
120+
response = client.chat.completions.create(
121+
model="o3-mini",
122+
messages=[
123+
{"role": "system", "content": "You are a technical learning resource curator."},
124+
{"role": "user", "content": prompt}
125+
],
126+
)
127+
128+
# Parse and return ranked resources
129+
ranked_data = json.loads(response.choices[0].message.content)
130+
return ranked_data["ranked_resources"]
131+
132+
except Exception as e:
133+
print(f"{Colors.RED}Error in ranking resources: {str(e)}{Colors.RESET}")
134+
return None
135+
136+
def get_prep_resources(skills):
137+
"""Get and rank learning resources for top skills."""
138+
try:
139+
core_resources = {
140+
"Tutorials": [],
141+
"Practice": [],
142+
"Documentation": []
143+
}
144+
145+
# Search for top 2 skills to reduce API usage
146+
top_skills = skills[:2]
147+
search = GoogleSearch({
148+
"q": f"learn {' '.join(top_skills)} tutorial practice exercises documentation",
149+
"api_key": serp_api_key,
150+
"num": 6
151+
})
152+
results = search.get_dict().get("organic_results", [])
153+
154+
for result in results[:6]:
155+
url = result.get("link", "")
156+
title = result.get("title", "")
157+
158+
if "tutorial" in title.lower() or "guide" in title.lower():
159+
core_resources["Tutorials"].append({"title": title, "url": url})
160+
elif "practice" in title.lower() or "exercise" in title.lower():
161+
core_resources["Practice"].append({"title": title, "url": url})
162+
elif "doc" in title.lower() or "reference" in title.lower():
163+
core_resources["Documentation"].append({"title": title, "url": url})
164+
165+
# Rank and summarize resources
166+
ranked_resources = rank_and_summarize_resources(core_resources, top_skills)
167+
return ranked_resources
168+
169+
except Exception as e:
170+
print(f"{Colors.RED}Error getting resources: {str(e)}{Colors.RESET}")
171+
return None
172+
173+
def generate_weekly_plan(skills):
174+
"""Generate a concise weekly preparation plan."""
175+
weeks = []
176+
total_skills = len(skills)
177+
178+
# Week 1: Fundamentals
179+
weeks.append({
180+
"focus": "Fundamentals",
181+
"skills": skills[:2] if total_skills >= 2 else skills,
182+
"tasks": ["Study core concepts", "Complete basic tutorials"]
183+
})
184+
185+
# Week 2: Advanced Concepts
186+
if total_skills > 2:
187+
weeks.append({
188+
"focus": "Advanced Topics",
189+
"skills": skills[2:4],
190+
"tasks": ["Deep dive into advanced features", "Practice exercises"]
191+
})
192+
193+
# Week 3: Projects & Practice
194+
weeks.append({
195+
"focus": "Projects",
196+
"skills": "All core skills",
197+
"tasks": ["Build small projects", "Solve practice problems"]
198+
})
199+
200+
# Week 4: Interview Prep
201+
weeks.append({
202+
"focus": "Interview Prep",
203+
"skills": "All skills",
204+
"tasks": ["Mock interviews", "Code reviews"]
205+
})
206+
207+
return weeks
208+
209+
def format_output(job_info, ranked_resources, weeks):
210+
"""Format output in a concise way with ranked resources."""
211+
output = f"\n{Colors.GREEN}=== Job Preparation Guide ==={Colors.RESET}\n"
212+
213+
# Job Requirements
214+
output += f"\n{Colors.CYAN}Position:{Colors.RESET} {job_info.get('job_title', 'N/A')}"
215+
output += f"\n{Colors.CYAN}Experience:{Colors.RESET} {job_info.get('experience_level', 'N/A')}"
216+
output += f"\n{Colors.CYAN}Key Skills:{Colors.RESET}"
217+
for skill in job_info.get('required_skills', []):
218+
output += f"\n- {skill}"
219+
220+
# Weekly Plan
221+
output += f"\n\n{Colors.CYAN}4-Week Plan:{Colors.RESET}"
222+
for i, week in enumerate(weeks, 1):
223+
output += f"\n\n📅 Week {i}: {week['focus']}"
224+
output += f"\n Skills: {', '.join(week['skills']) if isinstance(week['skills'], list) else week['skills']}"
225+
output += f"\n Tasks: {' → '.join(week['tasks'])}"
226+
227+
# Ranked Learning Resources
228+
if ranked_resources:
229+
output += f"\n\n{Colors.CYAN}Top Recommended Resources:{Colors.RESET}"
230+
231+
# Sort resources by relevance score
232+
sorted_resources = sorted(ranked_resources, key=lambda x: x['relevance_score'], reverse=True)
233+
234+
for res in sorted_resources[:5]: # Show top 5 resources
235+
output += f"\n\n📚 {res['title']} (Score: {res['relevance_score']}/10)"
236+
output += f"\n {res['summary']}"
237+
output += f"\n Why useful: {res['usefulness']}"
238+
output += f"\n URL: {res['url']}"
239+
240+
return output
241+
242+
def main():
243+
"""Main execution function."""
244+
try:
245+
job_url = input(f"{Colors.YELLOW}Enter job posting URL: {Colors.RESET}")
246+
247+
# Extract requirements
248+
job_info = extract_job_requirements(job_url, firecrawl_api_key)
249+
if not job_info:
250+
print(f"{Colors.RED}Failed to extract job requirements.{Colors.RESET}")
251+
return
252+
253+
# Get resources and generate plan
254+
print(f"{Colors.YELLOW}Finding and ranking preparation resources...{Colors.RESET}")
255+
resources = get_prep_resources(job_info.get('required_skills', []))
256+
weeks = generate_weekly_plan(job_info.get('required_skills', []))
257+
258+
# Display results
259+
print(format_output(job_info, resources, weeks))
260+
261+
except Exception as e:
262+
print(f"{Colors.RED}An error occurred: {str(e)}{Colors.RESET}")
263+
264+
if __name__ == "__main__":
265+
main()

0 commit comments

Comments
 (0)