diff --git a/.gitignore b/.gitignore index 018e3db..d394ead 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,7 @@ resume_JD_similarity/data/** **chroma_db** **bin** ui/public/uploads/** -data/uploads/** \ No newline at end of file +backend/test_api.py +backend/test_ats.py +backend/ats_report.html +data/uploads/** diff --git a/backend/ATS_agent/README.md b/backend/ATS_agent/README.md new file mode 100644 index 0000000..efae911 --- /dev/null +++ b/backend/ATS_agent/README.md @@ -0,0 +1,175 @@ +# ATS Resume Analyzer + +고급 AI 기반 이력서 ATS(Applicant Tracking System) 분석 도구입니다. 이력서와 채용 공고를 비교 분석하여 ATS 통과 가능성을 평가하고 개선 방안을 제시합니다. + +## 주요 기능 + +### 핵심 분석 기능 +- **키워드 매칭 분석**: 채용 공고의 핵심 키워드와 이력서 매칭 정도 평가 +- **경력 적합도 분석**: 요구 경력 및 자격 요건 충족도 평가 +- **형식 및 가독성 분석**: ATS 친화적 형식 및 구조 평가 +- **콘텐츠 품질 분석**: 성과 중심 서술 및 정량화 수준 평가 +- **산업 적합도 분석**: 해당 산업/직무에 대한 이해도 및 적합성 평가 + +### 고급 기능 +- **다국어 지원**: 한국어/영어 자동 감지 및 분석 +- **멀티 LLM 지원**: OpenAI GPT-4, Groq, Google Gemini 선택 가능 +- **시각적 보고서**: 레이더 차트와 HTML 기반 상세 보고서 +- **맞춤형 개선 제안**: 채용 공고별 구체적 개선 방안 제시 + +## 파일 구조 + +``` +validate_agent/ +├── ats_analyzer_improved.py# 메인 실행 파일 +├── ats_analyzer.py # 핵심 ATS 분석기 클래스 +├── analyzers.py # 개별 분석 모듈들 +│ # - KeywordAnalyzer: 키워드 매칭 +│ # - ExperienceAnalyzer: 경력 분석 +│ # - FormatAnalyzer: 형식 분석 +│ # - ContentAnalyzer: 콘텐츠 품질 +│ # - ErrorAnalyzer: 오류 검사 +│ # - IndustryAnalyzer: 산업 적합도 +│ # - CompetitiveAnalyzer: 경쟁력 분석 +├── ats_simulation.enc # ATS 키워드 시뮬레이션 +├── report_generator.py # HTML/텍스트 보고서 생성 +├── config.py # 설정 및 상수 +│ # - 언어별 패턴 및 템플릿 +│ # - 점수 가중치 설정 +├── utils.py # 유틸리티 함수 +│ # - 텍스트 정규화 및 언어 감지 +│ # - 마크다운 렌더링 +│ # - 폰트 설정 +├── llm_handler.py # LLM API 통합 관리 +│ # - OpenAI, Groq, Gemini 지원 +├── upstage_parser.py # 문서 파싱 (PDF/DOCX) +├── .env # API 키 설정 파일 +└── requirements.txt # 패키지 의존성 +``` + +## 설치 방법 + +### 1. 필수 패키지 설치 +```bash +pip install -r requirements.txt +``` + +### 2. API 키 설정 +`.env` 파일을 생성하고 다음 API 키를 설정합니다: + +```env +# OpenAI API (GPT-4) +OPENAI_API_KEY=your_openai_api_key_here + +# Groq API (선택사항) +GROQ_API_KEY=your_groq_api_key_here + +# Google Gemini API (선택사항) +GEMINI_API_KEY=your_gemini_api_key_here + +# Upstage Document Parser API +UPSTAGE_API_KEY=your_upstage_api_key_here +``` + +## 사용 방법 + +### 기본 실행 +```python +python ats_analyzer_improved.py +``` + +### 커스텀 설정 +`config.py`를 수정하여 설정을 변경할 수 있습니다: + +```python +# Configuration +CV_PATH = "이력서.pdf" +MODEL = 1 # 1=OpenAI, 2=Groq, 3=Gemini +ADVANCED = True # 고급 분석 수행 여부 +GENERATE_HTML = True # HTML 보고서 생성 여부 + +# Job description +JD_TEXT = """ +채용 공고 내용... +""" +``` + +### 프로그래밍 방식 사용 +```python +from ats_analyzer import ATSAnalyzer + +# 분석기 초기화 +analyzer = ATSAnalyzer( + cv_path="이력서.pdf", + jd_text="채용 공고 내용...", + model=1 # 1=OpenAI, 2=Groq, 3=Gemini +) + +# 분석 실행 +result = analyzer.run_full_analysis( + advanced=True, # 고급 분석 포함 + generate_html=True # HTML 보고서 생성 +) +``` + +## 분석 프로세스 + +### 1단계: 문서 추출 및 전처리 +- Upstage API를 통한 이력서 텍스트 추출 +- 언어 자동 감지 (한국어/영어) +- 텍스트 정규화 및 섹션 구조화 + +### 2단계: 채용 공고 분석 +- 필수/우대 자격 요건 추출 +- 핵심 키워드 및 중요도 평가 +- 기술 스택 및 소프트 스킬 파악 + +### 3단계: 다면적 분석 수행 +- **키워드 매칭**: 정확/부분 일치 키워드 분석 +- **경력 적합도**: 경력 연수, 학력, 산업 경험 +- **형식 평가**: ATS 친화적 구조 및 일관성 +- **콘텐츠 품질**: 정량화, 구체성, 성과 중심성 +- **산업 적합도**: 산업별 용어 및 트렌드 이해도 + +### 4단계: 보고서 생성 +- 5개 핵심 지표 레이더 차트 +- 섹션별 상세 분석 결과 +- 구체적 개선 권장사항 +- 경쟁력 평가 및 인터뷰 가능성 + +## 평가 지표 + +| 지표 | 가중치 | 설명 | +|------|--------|------| +| 키워드 적합도 | 25% | 채용 공고 키워드와의 매칭 정도 | +| 경력 적합도 | 20% | 요구 경력 및 자격 충족도 | +| 산업 적합도 | 15% | 산업/직무 특화 역량 | +| 콘텐츠 품질 | 5% | 서술의 구체성과 설득력 | +| 형식 | 3% | ATS 친화적 구조 | + +## 기술 스택 + +- **Python 3.8+** +- **LLM Integration**: OpenAI GPT-4.1-mini, Groq oss-120b, Google Gemini-2.5-flash +- **Document Parsing**: Upstage Document Parser API +- **Visualization**: Matplotlib +- **Reporting**: HTML/CSS, Markdown + +## 언어 지원 + +- **한국어**: 완전 지원 (분석, 보고서, UI) +- **영어**: 완전 지원 +- **자동 감지**: 이력서와 채용 공고 언어 자동 매칭 + +## 출력 예시 + +### HTML 보고서 구성 +1. **분석 요약**: 핵심 강점과 개선 필요 사항 +2. **레이더 차트**: 5개 핵심 지표 시각화 +3. **키워드 분석**: + - ✅ 일치한 키워드 + - ⚠️ 부분 일치 키워드 + - ❌ 누락된 키워드 +4. **섹션별 상세 분석**: 각 평가 항목별 구체적 피드백 +5. **개선 권장사항**: 우선순위별 구체적 개선 방안 +6. **경쟁력 평가**: 시장 경쟁력 및 인터뷰 가능성 \ No newline at end of file diff --git a/backend/ATS_agent/analyzers.py b/backend/ATS_agent/analyzers.py new file mode 100644 index 0000000..6d2bd41 --- /dev/null +++ b/backend/ATS_agent/analyzers.py @@ -0,0 +1,369 @@ +import json +import re + +try: + from ATS_agent.utils import extract_score +except ModuleNotFoundError: + from utils import extract_score + + +class KeywordAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + jd_analysis_str = "\n".join([ + "REQUIRED QUALIFICATIONS:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('required_qualifications', [])), + "PREFERRED QUALIFICATIONS:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('preferred_qualifications', [])), + "TECHNICAL SKILLS:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('technical_skills', [])), + "SOFT SKILLS:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('soft_skills', [])), + "INDUSTRY KNOWLEDGE:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('industry_knowledge', [])) + ]) + + top_keywords = sorted(self.analyzer.jd_keywords, key=lambda x: x.get('importance', 0), reverse=True)[:20] + keywords_str = "\n".join([f"- {kw.get('keyword')} (Importance: {kw.get('importance')}/10, Category: {kw.get('category')})" + for kw in top_keywords]) + + score_context = self.analyzer._localized_context( + "how well the resume matches the job description's keywords and requirements", + "이력서가 채용 공고의 키워드와 요구 사항에 얼마나 부합하는지" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Analyze how well this resume matches the key requirements and keywords from the job description. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION ANALYSIS: + {jd_analysis_str} + + TOP KEYWORDS FROM JOB DESCRIPTION: + {keywords_str} + + RESUME: + {self.analyzer.preprocessed_cv} + + Please provide a detailed analysis with the following: + + 1. TECHNICAL SKILLS MATCH: Evaluate how well the resume matches the required technical skills + 2. QUALIFICATIONS MATCH: Evaluate how well the resume matches required and preferred qualifications + 3. SOFT SKILLS MATCH: Evaluate how well the resume demonstrates the required soft skills + 4. EXPERIENCE MATCH: Evaluate how well the resume satisfies experience requirements + 5. KEYWORD ANALYSIS: Create a table showing matched and missing keywords, with their importance + + For each category, provide specific examples from both the job description and resume. + Calculate a match percentage for each category, and provide an overall keyword match score. + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Keywords analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Keywords score:", score) + + self.analyzer.analysis_results['keywords'] = response + self.analyzer.scores['keywords'] = score + + +class ExperienceAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Analyze how well the resume's experience and qualifications match the job requirements""" + score_context = self.analyzer._localized_context( + "how well the candidate's experience and qualifications match the job requirements", + "후보자의 경력과 자격이 채용 공고의 요구 사항과 얼마나 일치하는지" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Evaluate how well the candidate's experience and qualifications match the job requirements: + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + RESUME: + {self.analyzer.preprocessed_cv} + + Please provide a detailed analysis of: + 1. Required years of experience vs. candidate's experience + 2. Required education level vs. candidate's education + 3. Required industry experience vs. candidate's industry background + 4. Required responsibilities vs. candidate's demonstrated capabilities + 5. Required achievements vs. candidate's accomplishments + + + For each area, indicate whether the candidate exceeds, meets, or falls short of requirements. + Provide specific examples from both the job description and resume. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Experience analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Experience score:", score) + + self.analyzer.analysis_results['experience'] = response + self.analyzer.scores['experience'] = score + + +class FormatAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Analyze the resume's format, structure, and readability""" + score_context = self.analyzer._localized_context( + "the quality of the resume's format and readability", + "이력서 형식과 가독성의 품질" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Evaluate the format, structure, and readability of the following resume: + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + RESUME: + {self.analyzer.preprocessed_cv} + + Please analyze: + 1. Overall organization and structure + 2. Readability and clarity + 3. Use of bullet points, sections, and white space + 4. Consistency in formatting (dates, job titles, etc.) + 5. Grammar, spelling, and punctuation + 6. ATS-friendliness of the format + + + Provide specific examples of strengths and weaknesses in the format. + Suggest specific improvements to make the resume more ATS-friendly and readable. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Format analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Format score:", score) + + self.analyzer.analysis_results['format'] = response + self.analyzer.scores['format'] = score + + +class ContentAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Analyze the quality of content in the resume""" + score_context = self.analyzer._localized_context( + "the quality of the resume's content", + "이력서 콘텐츠의 전반적인 품질" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Evaluate the quality of content in the following resume: + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + RESUME: + {self.analyzer.preprocessed_cv} + + Please analyze: + 1. Use of strong action verbs and achievement-oriented language + 2. Quantification of achievements (metrics, percentages, numbers) + 3. Specificity vs. vagueness in descriptions + 4. Relevance of included information + 5. Balance between technical details and high-level accomplishments + 6. Presence of clichés or generic statements vs. unique value propositions + + + Provide specific examples from the resume for each point. + Suggest specific improvements to strengthen the content quality. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Content analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Content score:", score) + + self.analyzer.analysis_results['content'] = response + self.analyzer.scores['content'] = score + + +class ErrorAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Check for errors, inconsistencies, and red flags in the resume""" + score_context = self.analyzer._localized_context( + "how error-free and consistent the resume is (100 = perfect, no issues)", + "이력서의 오류 및 일관성 수준(100 = 완벽, 문제 없음)" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Analyze the following resume for errors, inconsistencies, and potential red flags: + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + RESUME: + {self.analyzer.preprocessed_cv} + + Please identify and explain: + 1. Spelling and grammar errors + 2. Inconsistencies in dates, job titles, or other information + 3. Unexplained employment gaps + 4. Formatting inconsistencies + 5. Potential red flags that might concern employers + + + For each issue found, provide the specific text from the resume and suggest a correction. + If no issues are found in a category, explicitly state that. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Errors analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Errors score:", score) + + self.analyzer.analysis_results['errors'] = response + self.analyzer.scores['errors'] = score + + +class IndustryAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Perform industry and job role specific analysis""" + # First, identify the industry and job role + industry_prompt = f""" + Based on the following job description, identify the specific industry and job role. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + Format your response as a JSON object with this structure: + {{"industry": "Technology", "job_role": "Software Engineer"}} + + + Be specific about both the industry and job role. + """ + + response = self.analyzer.call_llm(industry_prompt, model=self.analyzer.model) + + try: + json_match = re.search(r'\{\s*"industry"\s*:.+?\}', response, re.DOTALL) + if json_match: + response = json_match.group(0) + + job_info = json.loads(response) + industry = job_info.get('industry', 'General') + job_role = job_info.get('job_role', 'General') + except Exception as e: + print(f"Error parsing industry JSON: {e}") + industry = "Technology" + job_role = "Professional" + + score_context = self.analyzer._localized_context( + "how well the resume aligns with this specific industry and role", + "이력서가 해당 산업과 직무에 얼마나 적합한지" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + industry_analysis_prompt = f""" + Analyze this resume for a {job_role} position in the {industry} industry. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + RESUME: + {self.analyzer.preprocessed_cv} + + Please provide an industry-specific analysis considering: + 1. Industry-specific terminology and keywords in the resume + 2. Relevant industry experience and understanding + 3. Industry-specific certifications and education + 4. Industry trends awareness + 5. Industry-specific achievements and metrics + + + For each point, evaluate how well the resume demonstrates industry alignment. + Provide specific recommendations for improving industry relevance. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(industry_analysis_prompt, model=self.analyzer.model) + score = extract_score(response) + + self.analyzer.analysis_results['industry_specific'] = response + self.analyzer.scores['industry_specific'] = score + + +class CompetitiveAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Analyze the competitive position of this resume in the current job market""" + score_context = self.analyzer._localized_context( + "how well this resume would compete against other candidates", + "이력서가 다른 지원자와 비교했을 때 어느 정도 경쟁력을 갖는지" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Analyze how competitive this resume would be in the current job market for this position. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + RESUME: + {self.analyzer.preprocessed_cv} + + Please provide a competitive analysis including: + + + 1. MARKET COMPARISON: How this resume compares to typical candidates for this role + 2. STANDOUT STRENGTHS: The most impressive qualifications compared to the average candidate + 3. COMPETITIVE WEAKNESSES: Areas where the candidate may fall behind competitors + 4. DIFFERENTIATION FACTORS: Unique elements that set this resume apart (positively or negatively) + 5. HIRING PROBABILITY: Assessment of the likelihood of getting an interview (Low/Medium/High) + + + Base your analysis on current job market trends and typical qualifications for this role and industry. + Be honest but constructive in your assessment. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + score = extract_score(response) + + self.analyzer.analysis_results['competitive'] = response + self.analyzer.scores['competitive'] = score + return response \ No newline at end of file diff --git a/backend/ATS_agent/ats_analyzer.py b/backend/ATS_agent/ats_analyzer.py new file mode 100644 index 0000000..1910a56 --- /dev/null +++ b/backend/ATS_agent/ats_analyzer.py @@ -0,0 +1,515 @@ +import os +import re +import json +import time +from dotenv import load_dotenv + +try: + from ATS_agent.config import LANGUAGE_SECTION_PATTERNS, LANGUAGE_SCORE_TEMPLATES, LANGUAGE_HTML_LABELS + from ATS_agent.utils import ( + normalize_text, detect_language, advanced_preprocessing, + extract_resume_sections, extract_score + ) + from ATS_agent.llm_handler import LLMHandler + from ATS_agent.analyzers import ( + KeywordAnalyzer, ExperienceAnalyzer, FormatAnalyzer, + ContentAnalyzer, ErrorAnalyzer, IndustryAnalyzer, CompetitiveAnalyzer + ) + from ATS_agent.report_generator import ReportGenerator +except ModuleNotFoundError: + from config import LANGUAGE_SECTION_PATTERNS, LANGUAGE_SCORE_TEMPLATES, LANGUAGE_HTML_LABELS + from utils import ( + normalize_text, detect_language, advanced_preprocessing, + extract_resume_sections, extract_score + ) + from llm_handler import LLMHandler + from analyzers import ( + KeywordAnalyzer, ExperienceAnalyzer, FormatAnalyzer, + ContentAnalyzer, ErrorAnalyzer, IndustryAnalyzer, CompetitiveAnalyzer + ) + from report_generator import ReportGenerator + +import getpass +from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.ciphers.aead import AESGCM +import sys + +def derive_key(passphrase: str, salt: bytes) -> bytes: + kdf = PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + iterations=200_000, + ) + return kdf.derive(passphrase.encode()) + +def run_encrypted(path: str, passphrase: str): + with open(path, 'rb') as f: + raw = f.read() + if len(raw) < 28: + raise ValueError("Encrypted file too small/invalid") + salt, nonce, ct = raw[:16], raw[16:28], raw[28:] + key = derive_key(passphrase, salt) + aesgcm = AESGCM(key) + plaintext = aesgcm.decrypt(nonce, ct, None) + code = plaintext.decode('utf-8', errors='replace') + + local_ns = {} + compiled = compile(code, "", "exec") + exec(compiled, {"re": re}, local_ns) + return local_ns + + +class ATSAnalyzer: + def __init__(self, cv_path, jd_text, model=1): + self.cv_path = cv_path + self.jd_text = jd_text + self.cv_text = "" + self.preprocessed_cv = "" + self.preprocessed_cv_lower = "" + self._cv_text_no_space = "" + self.structured_cv = {} + self.jd_analysis = {} + self.jd_requirements = [] + self.jd_keywords = [] + self.analysis_results = {} + self.scores = {} + self.final_report = "" + self.improvement_suggestions = "" + self.competitive_analysis = "" + self.optimized_resume = "" + self.llm_call_count = 0 + self.total_tokens = 0 + self.total_time = 0 + self.model = model + self.language = 'en' + self.section_patterns = LANGUAGE_SECTION_PATTERNS[self.language] + self._score_template = LANGUAGE_SCORE_TEMPLATES[self.language] + + self.llm_handler = LLMHandler() + + load_dotenv() + + self.jd_text = normalize_text(self.jd_text) + + self.keyword_analyzer = KeywordAnalyzer(self) + self.experience_analyzer = ExperienceAnalyzer(self) + self.format_analyzer = FormatAnalyzer(self) + self.content_analyzer = ContentAnalyzer(self) + self.error_analyzer = ErrorAnalyzer(self) + self.industry_analyzer = IndustryAnalyzer(self) + self.competitive_analyzer = CompetitiveAnalyzer(self) + #self.ats_simulator = ATSSimulator(self) + self.report_generator = ReportGenerator(self) + + def _normalize_text(self, text): + return normalize_text(text) + + def _apply_language_settings(self, language): + self.language = language if language in LANGUAGE_SECTION_PATTERNS else 'en' + self.section_patterns = LANGUAGE_SECTION_PATTERNS[self.language] + self._score_template = LANGUAGE_SCORE_TEMPLATES.get(self.language, LANGUAGE_SCORE_TEMPLATES['en']) + + def _score_phrase_template(self): + return self._score_template + + def _score_instruction_text(self, context): + template = self._score_phrase_template().format(score='XX') + if self.language == 'ko': + return ( + f'분석을 마칠 때는 "{template}" 형식으로 마무리하고, ' + f'{context} 0-100 범위의 점수를 제시하세요.' + ) + return ( + f'End your analysis with "{template}" where XX is a score from 0-100 ' + f'representing {context}.' + ) + + def _format_score_line(self, score): + safe_score = max(0, min(100, int(round(score)))) + return self._score_template.format(score=safe_score) + + def _html_label(self, key, default): + return LANGUAGE_HTML_LABELS.get(self.language, {}).get(key, default) + + def _localized_context(self, english_text, korean_text): + return korean_text if self.language == 'ko' else english_text + + def _score_value(self, key, default=0.0): + value = self.scores.get(key, default) + try: + return float(value) + except (TypeError, ValueError): + return default + + def _evaluate_keyword_match(self, keyword): + if not keyword: + return 'none', 0.0 + + normalized_keyword = normalize_text(keyword).strip() + if not normalized_keyword: + return 'none', 0.0 + + keyword_lower = normalized_keyword.lower() + cv_text_lower = getattr(self, 'preprocessed_cv_lower', '') + if not cv_text_lower: + return 'none', 0.0 + + boundary_pattern = rf'\b{re.escape(keyword_lower)}\b' + if re.search(boundary_pattern, cv_text_lower, flags=re.IGNORECASE): + return 'exact', 1.0 + + if self.language == 'ko': + if keyword_lower in cv_text_lower: + return 'exact', 1.0 + + keyword_compact = re.sub(r'\s+', '', keyword_lower) + cv_compact = getattr(self, '_cv_text_no_space', '') + if keyword_compact and keyword_compact in cv_compact: + return 'exact', 1.0 + + tokens = [token for token in re.split(r'[\s/·•,]+', keyword_lower) if token] + if len(tokens) > 1: + matched_tokens = sum(1 for token in tokens if token and token in cv_text_lower) + match_ratio = matched_tokens / len(tokens) + if match_ratio >= 0.7: + return 'partial', match_ratio + + return 'none', 0.0 + + def extract_and_preprocess(self): + text = "" + upstage_available = False + + try: + # 절대/상대 경로 모두 지원 + try: + from parser import run_parser + except ImportError: + import sys + backend_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + if backend_dir not in sys.path: + sys.path.insert(0, backend_dir) + from parser import run_parser + upstage_available = True + except ImportError: + print("Warning: upstage_parser not found, using fallback text extraction") + + if upstage_available: + try: + result = run_parser(self.cv_path) + + if isinstance(result, tuple): + if len(result) >= 3: + contents, coordinates, full_contents = result + if full_contents is None: + print("Warning: upstage_parser returned None (API error)") + text = "" + else: + text = full_contents if full_contents else "" + elif len(result) == 2: + contents, full_contents = result + text = full_contents if full_contents else "" + else: + text = str(result[0]) if result[0] else "" + else: + text = str(result) if result else "" + + if not text or text == "None": + print("Warning: Empty or invalid response from upstage_parser") + text = "" + + except KeyError as e: + print(f"Warning: Missing key in upstage_parser response: {e}") + if os.path.exists(self.cv_path): + try: + with open(self.cv_path, 'r', encoding='utf-8') as f: + text = f.read() + except: + text = "" + except Exception as e: + print(f"Warning: Error using upstage_parser: {e}") + if os.path.exists(self.cv_path): + try: + with open(self.cv_path, 'r', encoding='utf-8') as f: + text = f.read() + except: + text = "" + else: + if os.path.exists(self.cv_path): + try: + with open(self.cv_path, 'r', encoding='utf-8') as f: + text = f.read() + except: + text = "" + else: + text = self.cv_path + + if not text: + print("Warning: No text extracted from resume. Using placeholder text for analysis.") + text = "Resume content not available for analysis." + + self.cv_text = normalize_text(text.strip()) + + detected_language = detect_language(f"{self.cv_text} {self.jd_text}") + self._apply_language_settings(detected_language) + + self.structured_cv = extract_resume_sections(self.cv_text, self.section_patterns) + + self.preprocessed_cv = advanced_preprocessing(self.cv_text) + self.preprocessed_cv_lower = self.preprocessed_cv.lower() + self._cv_text_no_space = re.sub(r'\s+', '', self.preprocessed_cv_lower) + + self.analyze_job_description() + + print(f"Extracted {len(self.cv_text)} characters from resume") + print(f"Identified {len(self.structured_cv)} sections in the resume") + print(f"Analyzed job description with {len(self.jd_keywords)} keywords extracted") + + def analyze_job_description(self): + """ + Analyze the job description to extract requirements, keywords, and other important information + This is a critical step to ensure the ATS analysis is specific to this particular job + """ + jd_analysis_prompt = f""" + Perform a detailed analysis of this job description to extract all information that would be used by an ATS system. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.jd_text} + + Please provide a comprehensive analysis with the following components: + + 1. REQUIRED QUALIFICATIONS: All explicitly stated required qualifications (education, experience, certifications, etc.) + 2. PREFERRED QUALIFICATIONS: All preferred or desired qualifications that are not strictly required + 3. KEY RESPONSIBILITIES: The main job duties and responsibilities + 4. TECHNICAL SKILLS: All technical skills, tools, languages, frameworks, etc. mentioned + 5. SOFT SKILLS: All soft skills, personal qualities, and character traits mentioned + 6. INDUSTRY KNOWLEDGE: Required industry-specific knowledge or experience + 7. COMPANY VALUES: Any company values or culture fit indicators mentioned + + Format your response as a valid JSON object with these categories as keys, and arrays of strings as values. + Also include a "keywords" array with all important keywords from the job description, each with an importance score from 1-10. + + The JSON must be properly formatted with no errors. Make sure all quotes are properly escaped and all arrays and objects are properly closed. + + Example format: + {{"required_qualifications": ["Bachelor's degree in Computer Science", "5+ years of experience"], + "preferred_qualifications": ["Master's degree", "Experience with cloud platforms"], + "key_responsibilities": ["Develop software applications", "Debug and troubleshoot issues"], + "technical_skills": ["Python", "JavaScript", "AWS"], + "soft_skills": ["Communication", "Teamwork"], + "industry_knowledge": ["Financial services", "Regulatory compliance"], + "company_values": ["Innovation", "Customer focus"], + "keywords": [{{"keyword": "Python", "importance": 9, "category": "Technical Skill"}}, {{"keyword": "Bachelor's degree", "importance": 8, "category": "Education"}}] + }} + + Return ONLY the JSON object with no additional text before or after. + """ + + response = self.call_llm(jd_analysis_prompt, model=self.model) + + try: + start_idx = response.find('{') + end_idx = response.rfind('}') + + if start_idx >= 0 and end_idx >= 0: + response = response[start_idx:end_idx+1] + + try: + self.jd_analysis = json.loads(response) + except json.JSONDecodeError as e: + print(f"Initial JSON parsing failed: {e}") + print("Attempting to fix JSON format...") + + response = response.replace("'", '"') + response = re.sub(r',\s*}', '}', response) + response = re.sub(r',\s*]', ']', response) + + self.jd_analysis = json.loads(response) + + self.jd_keywords = self.jd_analysis.get('keywords', []) + + self.jd_requirements = ( + self.jd_analysis.get('required_qualifications', []) + + self.jd_analysis.get('preferred_qualifications', []) + + self.jd_analysis.get('technical_skills', []) + + self.jd_analysis.get('soft_skills', []) + + self.jd_analysis.get('industry_knowledge', []) + ) + + print(f"Successfully parsed JD analysis with {len(self.jd_keywords)} keywords") + + except Exception as e: + print(f"Error parsing JD analysis JSON: {e}") + print(f"Raw response: {response[:500]}...") + + print("Creating default JD analysis structure with dummy data") + self.jd_analysis = { + "required_qualifications": ["Master's degree", "1+ years of experience"], + "preferred_qualifications": ["PhD", "Industry experience"], + "key_responsibilities": ["Research", "Development", "Collaboration"], + "technical_skills": ["Python", "Machine Learning", "Deep Learning"], + "soft_skills": ["Communication", "Teamwork"], + "industry_knowledge": ["AI Research", "Software Development"], + "company_values": ["Innovation", "Collaboration"], + "keywords": [ + {"keyword": "Python", "importance": 9, "category": "Technical Skill"}, + {"keyword": "Machine Learning", "importance": 8, "category": "Technical Skill"}, + {"keyword": "Research", "importance": 7, "category": "Experience"}, + {"keyword": "Master's degree", "importance": 8, "category": "Education"} + ] + } + self.jd_keywords = self.jd_analysis["keywords"] + self.jd_requirements = ( + self.jd_analysis["required_qualifications"] + + self.jd_analysis["preferred_qualifications"] + + self.jd_analysis["technical_skills"] + + self.jd_analysis["soft_skills"] + + self.jd_analysis["industry_knowledge"] + ) + + def call_llm(self, prompt, model=None): + """Call the LLM API with the given prompt""" + if model is None: + model = self.model + response = self.llm_handler.call_llm(prompt, model, self.language) + stats = self.llm_handler.get_statistics() + self.llm_call_count = stats['llm_call_count'] + self.total_tokens = stats['total_tokens'] + return response + + def analyze_keywords(self): + """Analyze how well the resume matches key terms in the job description""" + self.keyword_analyzer.analyze() + + def analyze_experience_and_qualifications(self): + """Analyze how well the resume's experience and qualifications match the job requirements""" + self.experience_analyzer.analyze() + + def analyze_format_and_readability(self): + """Analyze the resume's format, structure, and readability""" + self.format_analyzer.analyze() + + def analyze_content_quality(self): + """Analyze the quality of content in the resume""" + self.content_analyzer.analyze() + + def check_errors_and_consistency(self): + """Check for errors, inconsistencies, and red flags in the resume""" + self.error_analyzer.analyze() + + # def simulate_ats_filtering(self): + # """Simulate how an actual ATS system would evaluate this resume""" + # #self.ats_simulator.simulate() + def simulate_ats_filtering(self): + current_dir = os.path.dirname(os.path.abspath(__file__)) + enc_path = os.path.join(current_dir, "ats_simulation.enc") + passphrase = "ats_simulation" + try: + ns = run_encrypted(enc_path, passphrase) + if "ATSSimulator" in ns: + self.ats_simulator = ns["ATSSimulator"](self) + self.ats_simulator.simulate() + except Exception as e: + print("오류:", e) + sys.exit(1) + + + def analyze_industry_specific(self): + """Perform industry and job role specific analysis""" + self.industry_analyzer.analyze() + + def analyze_competitive_position(self): + """Analyze the competitive position of this resume in the current job market""" + return self.competitive_analyzer.analyze() + + def suggest_resume_improvements(self): + """Generate specific suggestions to improve the resume for this job""" + return self.report_generator.generate_improvement_suggestions() + + def generate_optimized_resume(self): + """Generate an optimized version of the resume tailored to the job description""" + return self.report_generator.generate_optimized_resume() + + def generate_final_score_and_recommendations(self): + """Generate final score with weighted categories and overall recommendations""" + self.report_generator.generate_final_score_and_recommendations() + + def generate_visual_report(self, output_path="ats_report.html"): + """Generate a visual HTML report with charts and formatted analysis""" + return self.report_generator.generate_visual_report(output_path) + + def generate_text_report(self): + """Generate a text-based report of the analysis""" + return self.report_generator.generate_text_report() + + def extract_score(self, response_text): + """Extract score from LLM response""" + return extract_score(response_text) + + def run_full_analysis(self, advanced=True, generate_html=True): + """ + Run the complete resume analysis + + Args: + advanced (bool): Whether to run advanced analyses + generate_html (bool): Whether to generate HTML report + + Returns: + str: Path to the report or text report + """ + start_time = time.time() + + print("Starting ATS analysis for this specific job description...") + + self.extract_and_preprocess() + + print(f"Analyzing resume against {len(self.jd_keywords)} job-specific keywords...") + + self.analyze_keywords() + self.analyze_experience_and_qualifications() + self.analyze_format_and_readability() + self.analyze_content_quality() + self.check_errors_and_consistency() + + if advanced: + print("Running advanced ATS simulation...") + self.simulate_ats_filtering() + self.analyze_industry_specific() + self.analyze_competitive_position() + + print("Generating job-specific improvement suggestions...") + self.suggest_resume_improvements() + + print("Calculating final ATS score for this job...") + self.generate_final_score_and_recommendations() + + self.total_time = time.time() - start_time + print(f"Analysis completed in {self.total_time:.1f} seconds") + + self.print_usage_statistics() + + if generate_html: + print("Generating visual HTML report...") + report_path = self.generate_visual_report() + print(f"HTML report generated: {report_path}") + return report_path + else: + return self.generate_text_report() + + def print_usage_statistics(self): + """Print usage statistics to console""" + print("\n===== USAGE STATISTICS =====") + print(f"LLM API Calls: {self.llm_call_count}") + print(f"Total Tokens Used: {self.total_tokens}") + print(f"Analysis Time: {self.total_time:.2f} seconds") + + print("\n===== SCORE BREAKDOWN =====") + print(f"Keywords Match: {self.scores.get('keywords', 0)}/100") + print(f"Experience Match: {self.scores.get('experience', 0)}/100") + print(f"Format & Readability: {self.scores.get('format', 0)}/100") + print(f"Content Quality: {self.scores.get('content', 0)}/100") + print(f"Industry Alignment: {self.scores.get('industry_specific', 0)}/100") + print("============================\n") \ No newline at end of file diff --git a/backend/ATS_agent/ats_analyzer_improved.py b/backend/ATS_agent/ats_analyzer_improved.py new file mode 100644 index 0000000..5f03f77 --- /dev/null +++ b/backend/ATS_agent/ats_analyzer_improved.py @@ -0,0 +1,52 @@ +import os +import sys + +try: + from ATS_agent.ats_analyzer import ATSAnalyzer + from ATS_agent.config import CV_PATH, MODEL, ADVANCED, GENERATE_HTML, JD_TEXT +except (ModuleNotFoundError, ImportError) as e: + current_dir = os.path.dirname(os.path.abspath(__file__)) + if current_dir not in sys.path: + sys.path.insert(0, current_dir) + + from ats_analyzer import ATSAnalyzer + from config import CV_PATH, MODEL, ADVANCED, GENERATE_HTML, JD_TEXT + + +def main(): + cv_path = CV_PATH + model = MODEL + advanced = ADVANCED + generate_html = GENERATE_HTML + + # Job description + jd_text = JD_TEXT + + if not os.path.exists(cv_path): + print(f"Warning: Resume file '{cv_path}' not found.") + print("Please provide a valid resume file path.") + print("\nUsage: python main.py") + print("Edit the cv_path variable in main.py to point to your resume file.") + return + + try: + print("Initializing ATS Analyzer...") + analyzer = ATSAnalyzer(cv_path, jd_text, model=model) + + print("Starting analysis...") + result = analyzer.run_full_analysis(advanced=advanced, generate_html=generate_html) + + if not generate_html: + print(result) + else: + print(f"\n분석 완료! 보고서가 저장된 경로: {result}") + print("웹 브라우저에서 HTML 파일을 열어 전체 보고서를 확인하세요.") + + except Exception as e: + print(f"Error during analysis: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/ATS_agent/ats_simulation.enc b/backend/ATS_agent/ats_simulation.enc new file mode 100644 index 0000000..248c7c2 Binary files /dev/null and b/backend/ATS_agent/ats_simulation.enc differ diff --git a/backend/ATS_agent/config.py b/backend/ATS_agent/config.py new file mode 100644 index 0000000..eb3a55f --- /dev/null +++ b/backend/ATS_agent/config.py @@ -0,0 +1,104 @@ +CV_PATH = "최재강_이력서.pdf" +MODEL = 1 # 1=OpenAI, 2=Groq, 3=Gemini +ADVANCED = True +GENERATE_HTML = True + +# Job description +JD_TEXT = """ +AI Lab을 소개해요.음악, 스토리 등 다양한 콘텐츠의 추천과 검색을 위해 고객의 행동과 데이터를 분석하고 이를 바탕으로 AI 모델과 추천 시스템을 설계·학습·운영하는 일을 하고 있어요. 특히 엔터테인먼트 산업에 특화된 AI 기술을 개발하며, 이렇게 만든 모델을 실제 서비스에 적용해 사용자 경험을 높이고 비즈니스 효율을 극대화하는 것이 목표예요. ‍함께 할 업무를 알려드려요.LLM, 생성형 AI, 멀티에이전트 시스템 등 다양한 AI 모델을 연구하고 개발하는 업무를 경험해요. 음악, 스토리 등 콘텐츠 추천 모델을 함께 연구하고 만들어요. ‍텍스트, 오디오, 비디오 등 다양한 데이터를 활용해 모델을 설계하고 학습 시키며 성능을 개선해요. 연구한 모델을 실제 서비스에 적용하고, 안정적으로 운영될 수 있도록 관리해요. 앞으로 성장하며 경험할 수 있는 업무예요.생성형 AI, LLM 등 AI 모델을 직접 연구하고 개발하면서 AI 분야의 전문가로 성장할 수 있어요. 직접 연구한 모델을 기반으로 AI 기반 서비스 기획이나 전략을 수립하며 비즈니스와 기술을 연결하는 역할을 수행할 수 있어요.이런 분이면 더 좋을 것 같아요. 다양한 콘텐츠와 K-pop, 엔터테인먼트 산업에 관심이 많고 이해도가 있으신 분이면 좋아요.특히 직접 웹툰, 웹소설, 영상, 음악 등 디지털 콘텐츠를 즐기고 경험해본 분이면 좋을 것 같아요. 학회나 컨퍼런스에서 논문을 발표하거나 출판해본 경험이 있는 분이면 많은 도움이 될 것 같아요. 원활한 커뮤니케이션과 논리적인 사고로 문제를 해결하는 것을 좋아하시는 분을 환영해요. 영어 또는 다른 외국어 사용에 능숙하시거나 완벽하지 않더라도 두려움이 없으신 분이면 좋을 것 같아요. +인터넷·IT·통신·모바일·게임>빅데이터·AI(인공지능)>인공지능(AI)|인터넷·IT·통신·모바일·게임>응용프로그래머>인공지능(AI)|인터넷·IT·통신·모바일·게임>ERP·시스템분석·설계>인공지능(AI)서비스기획 +""" + +DEFAULT_SECTION_PATTERNS = { + 'personal_info': r'(Personal\s*Information|Contact|Profile)', + 'summary': r'(Summary|Professional\s*Summary|Profile|Objective)', + 'education': r'(Education|Academic|Qualifications|Degrees)', + 'experience': r'(Experience|Work\s*Experience|Employment|Career\s*History)', + 'skills': r'(Skills|Technical\s*Skills|Competencies|Expertise)', + 'projects': r'(Projects|Key\s*Projects|Professional\s*Projects)', + 'certifications': r'(Certifications|Certificates|Accreditations)', + 'languages': r'(Languages|Language\s*Proficiency)', + 'publications': r'(Publications|Research|Papers)', + 'awards': r'(Awards|Honors|Achievements|Recognitions)' +} + +KOREAN_SECTION_PATTERNS = { + 'personal_info': r'(개인\s*정보|인적\s*사항|연락처|프로필)', + 'summary': r'(요약|소개|경력\s*요약|프로필|지원\s*동기)', + 'education': r'(학력|교육|학위|교육\s*사항)', + 'experience': r'(경력|경력\s*사항|직무\s*경험|근무\s*경력|프로젝트\s*경험)', + 'skills': r'(기술|보유\s*기술|핵심\s*역량|스킬|기술\s*역량)', + 'projects': r'(프로젝트|주요\s*프로젝트|연구\s*과제)', + 'certifications': r'(자격증|자격|인증|어학|어학\s*성적)', + 'languages': r'(언어|어학|언어\s*능력|외국어)', + 'publications': r'(논문|발표|출판|연구)', + 'awards': r'(수상|수상\s*경력|수상\s*내역|상훈)' +} + +LANGUAGE_SECTION_PATTERNS = { + 'en': DEFAULT_SECTION_PATTERNS, + 'ko': { + **DEFAULT_SECTION_PATTERNS, + **KOREAN_SECTION_PATTERNS + } +} + +LANGUAGE_SCORE_TEMPLATES = { + 'en': "Score: {score} points", + 'ko': "점수: {score}점" +} + +LANGUAGE_CATEGORY_LABELS = { + 'en': [ + 'Keywords', 'Experience', 'Industry Fit', 'Content Quality', 'Format' + ], + 'ko': [ + '키워드 적합도', '경력 적합도', '산업 적합도', '콘텐츠 품질', '형식' + ] +} + +LANGUAGE_HTML_LABELS = { + 'en': { + 'title': 'Resume ATS Analysis Report', + 'analysis_date': 'Analysis Date', + 'score_breakdown': 'Score Breakdown', + 'executive_summary': 'Executive Summary', + 'ats_results': 'ATS Simulation Results', + 'improvement': 'Recommended Improvements', + 'detailed_analysis': 'Detailed Analysis', + 'keywords_match': 'Keywords Match', + 'experience_match': 'Experience & Qualifications', + 'ats_simulation': 'ATS Simulation', + 'industry_alignment': 'Industry Alignment', + 'content_quality': 'Content Quality', + 'format_quality': 'Format & Readability', + 'error_check': 'Errors & Consistency' + }, + 'ko': { + 'title': '이력서 ATS 분석 보고서', + 'analysis_date': '분석 일시', + 'score_breakdown': '세부 점수 현황', + 'executive_summary': '요약 평가', + 'ats_results': 'ATS 시뮬레이션 결과', + 'improvement': '개선 권장 사항', + 'detailed_analysis': '세부 분석', + 'keywords_match': '키워드 적합도', + 'experience_match': '경력 및 자격 적합도', + 'ats_simulation': 'ATS 시뮬레이션', + 'industry_alignment': '산업 적합도', + 'content_quality': '콘텐츠 품질', + 'format_quality': '형식 및 가독성', + 'error_check': '오류 및 일관성' + } +} + + +SCORE_WEIGHTS = { + 'ats_simulation': 0.30, + 'keywords': 0.25, + 'experience': 0.20, + 'industry_specific': 0.15, + 'content': 0.05, + 'format': 0.03, + 'errors': 0.02, +} \ No newline at end of file diff --git a/backend/ATS_agent/llm_handler.py b/backend/ATS_agent/llm_handler.py new file mode 100644 index 0000000..f71817b --- /dev/null +++ b/backend/ATS_agent/llm_handler.py @@ -0,0 +1,176 @@ +import os +import openai +from dotenv import load_dotenv + + +class LLMHandler: + def __init__(self): + self.llm_call_count = 0 + self.total_tokens = 0 + self._load_api_keys() + + def _load_api_keys(self): + env_paths = [] + + try: + env_paths.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '.env')) + except: + pass + + try: + env_paths.append('/mnt/e/code/GJS/JobPT-main/research/validate_agent/.env') + except: + pass + + env_file_path = None + for path in env_paths: + if os.path.exists(path): + env_file_path = path + break + + if not env_file_path: + default_path = env_paths[0] if env_paths else '.env' + print(f"Warning: .env file not found. Creating default at {default_path}") + self._create_default_env(default_path) + env_file_path = default_path + + load_dotenv(env_file_path) + + def _create_default_env(self, path): + with open(path, 'w') as f: + f.write("# API Keys for ATS Analyzer\n") + f.write("# Replace with your actual API keys\n\n") + f.write("# OpenAI API Key\n") + f.write("OPENAI_API_KEY=your_openai_api_key_here\n\n") + f.write("# Groq API Key (optional, only needed if using model=2)\n") + f.write("GROQ_API_KEY=your_groq_api_key_here\n\n") + f.write("# Gemini API Key (optional, only needed if using model=3)\n") + f.write("GEMINI_API_KEY=your_gemini_api_key_here\n") + + def call_llm(self, prompt, model=1, language='en'): + try: + system_prompt = "You are an expert resume analyst and ATS specialist." + if language == 'ko': + system_prompt += " 모든 답변은 한국어로 제공하되, 지시된 용어 형식을 유지하세요." + + if model == 1: + return self._call_openai(prompt, system_prompt) + elif model == 2: + return self._call_groq(prompt, system_prompt) + elif model == 3: + return self._call_gemini(prompt, system_prompt) + else: + return "Error: Invalid model selection" + + except Exception as e: + print(f"Error calling LLM API: {e}") + return self._generate_dummy_response(prompt) + + def _call_openai(self, prompt, system_prompt): + openai_api_key = os.getenv("OPENAI_API_KEY") + if not openai_api_key or openai_api_key == "your_openai_api_key_here": + print("Error: OpenAI API key not found or not set in .env file") + print("Attempting to use alternative model...") + return self._generate_dummy_response(prompt) + + client = openai.OpenAI(api_key=openai_api_key) + response = client.chat.completions.create( + model="gpt-4.1-nano", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ], + temperature=0.1, + max_tokens=1500 + ) + + self.llm_call_count += 1 + self.total_tokens += response.usage.total_tokens + return response.choices[0].message.content.strip() + + def _call_groq(self, prompt, system_prompt): + try: + from groq import Groq + except ImportError: + print("Error: Groq package not installed. Please install it with 'pip install groq'") + print("Falling back to OpenAI API...") + return self._call_openai(prompt, system_prompt) + + groq_api_key = os.getenv("GROQ_API_KEY") + if not groq_api_key or groq_api_key == "your_groq_api_key_here": + print("Error: Groq API key not found or not set in .env file") + print("Falling back to OpenAI API...") + return self._call_openai(prompt, system_prompt) + + client = Groq(api_key=groq_api_key) + completion = client.chat.completions.create( + model="meta-llama/llama-4-maverick-17b-128e-instruct", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ], + temperature=0.1, + max_completion_tokens=1500, + top_p=1, + stream=False, + stop=None, + ) + + self.llm_call_count += 1 + self.total_tokens += completion.usage.total_tokens + return completion.choices[0].message.content.strip() + + def _call_gemini(self, prompt, system_prompt): + gemini_api_key = os.getenv("GEMINI_API_KEY") + if not gemini_api_key or gemini_api_key == "your_gemini_api_key_here": + print("Error: Gemini API key not found or not set in .env file") + print("Attempting to use OpenAI API instead...") + return self._call_openai(prompt, system_prompt) + + client = openai.OpenAI( + api_key=gemini_api_key, + base_url="https://generativelanguage.googleapis.com/v1beta/openai/" + ) + response = client.chat.completions.create( + model="gemini-2.0-flash-lite", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ], + temperature=0.1, + max_tokens=1500 + ) + + self.llm_call_count += 1 + self.total_tokens += response.usage.total_tokens + return response.choices[0].message.content.strip() + + def _generate_dummy_response(self, prompt): + print("Generating dummy response for testing purposes...") + + if "keywords" in prompt.lower(): + return "This is a dummy keywords analysis.\n\nThe resume contains some keywords that match the job description, but could be improved by adding more specific technical skills and qualifications.\n\nScore: 65 points" + elif "experience" in prompt.lower(): + return "This is a dummy experience analysis.\n\nThe candidate's experience partially matches the job requirements. Some areas could be strengthened to better align with the position.\n\nScore: 70 points" + elif "format" in prompt.lower(): + return "This is a dummy format analysis.\n\nThe resume has a clean format but could be improved with better section organization and more consistent formatting.\n\nScore: 75 points" + elif "content" in prompt.lower(): + return "This is a dummy content quality analysis.\n\nThe content is generally good but could use more quantifiable achievements and specific examples.\n\nScore: 68 points" + elif "errors" in prompt.lower(): + return "This is a dummy errors analysis.\n\nThe resume has few grammatical errors but some inconsistencies in formatting and punctuation.\n\nScore: 80 points" + elif "industry" in prompt.lower(): + return "This is a dummy industry analysis.\n\nThe resume shows good industry alignment but could benefit from more industry-specific terminology.\n\nScore: 72 points" + elif "competitive" in prompt.lower(): + return "This is a dummy competitive analysis.\n\nThe resume is competitive but could be strengthened in areas of technical expertise and project outcomes.\n\nScore: 70 points" + elif "improvements" in prompt.lower(): + return "This is a dummy improvement suggestions.\n\n1. Add more technical keywords from the job description\n2. Quantify achievements with specific metrics\n3. Improve formatting for better ATS readability" + elif "final assessment" in prompt.lower(): + return "This is a dummy final assessment.\n\nThe resume is generally well-aligned with the job description but has room for improvement in keyword matching and experience presentation.\n\nFinal recommendation: Make minor improvements before applying." + else: + return "This is a dummy response for testing purposes. In a real scenario, this would contain a detailed analysis based on your prompt.\n\nScore: 70 points" + + def get_statistics(self): + return { + 'llm_call_count': self.llm_call_count, + 'total_tokens': self.total_tokens + } \ No newline at end of file diff --git a/backend/ATS_agent/report_generator.py b/backend/ATS_agent/report_generator.py new file mode 100644 index 0000000..e9a8417 --- /dev/null +++ b/backend/ATS_agent/report_generator.py @@ -0,0 +1,386 @@ +import os +import numpy as np +import matplotlib.pyplot as plt +from io import BytesIO +import base64 +from datetime import datetime + +try: + from ATS_agent.config import LANGUAGE_CATEGORY_LABELS, SCORE_WEIGHTS + from ATS_agent.utils import configure_plot_fonts, restore_plot_fonts, render_markdown +except ModuleNotFoundError: + from config import LANGUAGE_CATEGORY_LABELS, SCORE_WEIGHTS + from utils import configure_plot_fonts, restore_plot_fonts, render_markdown + + +class ReportGenerator: + def __init__(self, analyzer): + self.analyzer = analyzer + + def generate_improvement_suggestions(self): + prompt = f""" + Based on the comprehensive analysis of this resume against the job description, provide specific, actionable improvements. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + RESUME: + {self.analyzer.preprocessed_cv} + + ANALYSIS RESULTS: + Keywords Analysis: {self.analyzer.scores.get('keywords', 'N/A')}/100 + Experience Match: {self.analyzer.scores.get('experience', 'N/A')}/100 + Format & Readability: {self.analyzer.scores.get('format', 'N/A')}/100 + Content Quality: {self.analyzer.scores.get('content', 'N/A')}/100 + Errors & Consistency: {self.analyzer.scores.get('errors', 'N/A')}/100 + ATS Simulation: {self.analyzer.scores.get('ats_simulation', 'N/A')}/100 + Industry Alignment: {self.analyzer.scores.get('industry_specific', 'N/A')}/100 + + + Please provide specific, actionable improvements in these categories: + + + 1. CRITICAL ADDITIONS: Keywords and qualifications that must be added + 2. CONTENT ENHANCEMENTS: How to strengthen existing content + 3. FORMAT IMPROVEMENTS: Structural changes to improve ATS compatibility + 4. REMOVAL SUGGESTIONS: Content that should be removed or de-emphasized + 5. SECTION-BY-SECTION RECOMMENDATIONS: Specific improvements for each resume section + + + For each suggestion, provide a clear before/after example where possible. + Focus on the most impactful changes that will significantly improve ATS performance and human readability. + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + self.analyzer.improvement_suggestions = response + return response + + def generate_optimized_resume(self): + prompt = f""" + Create an optimized version of this resume specifically tailored for the job description. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + CURRENT RESUME: + {self.analyzer.preprocessed_cv} + + Please rewrite the resume to: + 1. Incorporate all relevant keywords from the job description + 2. Highlight the most relevant experience and qualifications + 3. Use ATS-friendly formatting and structure + 4. Quantify achievements where possible + 5. Remove or downplay irrelevant information + + + The optimized resume should maintain truthfulness while presenting the candidate in the best possible light for this specific position. + Use standard resume formatting with clear section headers. + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + self.analyzer.optimized_resume = response + return response + + def generate_final_score_and_recommendations(self): + weighted_sum = 0 + used_weights_sum = 0 + category_scores = {} + + for category, weight in SCORE_WEIGHTS.items(): + if category in self.analyzer.scores: + score = self.analyzer.scores[category] + try: + score_value = float(score) + except (TypeError, ValueError): + continue + + weighted_sum += score_value * weight + used_weights_sum += weight + category_scores[category] = score_value + + if used_weights_sum > 0: + final_score = weighted_sum / used_weights_sum + else: + numeric_scores = [] + for key, value in self.analyzer.scores.items(): + if key == 'final': + continue + try: + numeric_scores.append(float(value)) + except (TypeError, ValueError): + continue + final_score = sum(numeric_scores) / len(numeric_scores) if numeric_scores else 0 + + self.analyzer.scores['final'] = final_score + + jd_summary = "" + if self.analyzer.jd_analysis: + jd_summary = "JOB DESCRIPTION ANALYSIS:\n" + if self.analyzer.jd_analysis.get('required_qualifications'): + jd_summary += "Required Qualifications: " + ", ".join(self.analyzer.jd_analysis.get('required_qualifications')[:5]) + "\n" + if self.analyzer.jd_analysis.get('technical_skills'): + jd_summary += "Technical Skills: " + ", ".join(self.analyzer.jd_analysis.get('technical_skills')[:5]) + "\n" + if self.analyzer.jd_analysis.get('key_responsibilities'): + jd_summary += "Key Responsibilities: " + ", ".join(self.analyzer.jd_analysis.get('key_responsibilities')[:3]) + "\n" + + prompt = f""" + Based on the comprehensive analysis of this resume against the job description, provide a final assessment and recommendations. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + {jd_summary} + + RESUME ANALYSIS SCORES: + ATS Simulation Score: {category_scores.get('ats_simulation', 'N/A')}/100 (30% of final score) + Keywords Match: {category_scores.get('keywords', 'N/A')}/100 (25% of final score) + Experience Match: {category_scores.get('experience', 'N/A')}/100 (20% of final score) + Industry Alignment: {category_scores.get('industry_specific', 'N/A')}/100 (15% of final score) + Content Quality: {category_scores.get('content', 'N/A')}/100 (5% of final score) + Format & Readability: {category_scores.get('format', 'N/A')}/100 (3% of final score) + Errors & Consistency: {category_scores.get('errors', 'N/A')}/100 (2% of final score) + + FINAL WEIGHTED SCORE: {final_score:.1f}/100 + + Please provide a detailed final assessment with: + + 1. EXECUTIVE SUMMARY: A concise summary of how well this resume matches this specific job description + + 2. STRENGTHS: The top 3 strengths of this resume for this specific job + + 3. CRITICAL IMPROVEMENTS: The top 3 most critical improvements needed to better match this job description + + 4. ATS ASSESSMENT: An assessment of the resume's likelihood of passing ATS filters for this specific job + + 5. INTERVIEW POTENTIAL: An assessment of whether this resume would likely lead to an interview + + 6. FINAL RECOMMENDATION: A clear verdict on whether the candidate should: + a) Apply with this resume as is + b) Make minor improvements before applying + c) Make major improvements before applying + + Be specific about which improvements would have the biggest impact on ATS performance for this particular job. + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + self.analyzer.final_report = response + + def generate_visual_report(self, output_path="ats_report.html"): + try: + categories = LANGUAGE_CATEGORY_LABELS.get( + self.analyzer.language, + LANGUAGE_CATEGORY_LABELS['en'] + ).copy() + + values = [ + self.analyzer._score_value('keywords'), + self.analyzer._score_value('experience'), + self.analyzer._score_value('industry_specific'), + self.analyzer._score_value('content'), + self.analyzer._score_value('format') + ] + + fig = plt.figure(figsize=(10, 6)) + ax = fig.add_subplot(111, polar=True) + + angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist() + + values.append(values[0]) + angles.append(angles[0]) + categories.append(categories[0]) + + font_settings = configure_plot_fonts(self.analyzer.language) + _, font_prop = font_settings if font_settings else (None, None) + + ax.plot(angles, values, 'o-', linewidth=2) + ax.fill(angles, values, alpha=0.25) + ax.set_thetagrids(np.degrees(angles[:-1]), categories[:-1]) + ax.set_ylim(0, 100) + title_text = self.analyzer._html_label('title', 'Resume ATS Analysis Report') + if font_prop: + ax.set_title(title_text, fontproperties=font_prop, size=15) + else: + ax.set_title(title_text, size=15) + + if font_prop: + for label in ax.get_xticklabels() + ax.get_yticklabels(): + label.set_fontproperties(font_prop) + + buffer = BytesIO() + plt.savefig(buffer, format='png', bbox_inches='tight') + buffer.seek(0) + img_str = base64.b64encode(buffer.read()).decode() + plt.close() + restore_plot_fonts(font_settings) + + html_content = self._generate_html_content(img_str) + + with open(output_path, 'w', encoding='utf-8') as f: + f.write(html_content) + + return output_path + + except Exception as e: + print(f"Error generating visual report: {e}") + return None + + def _generate_html_content(self, img_str): + html_title = self.analyzer._html_label('title', 'Resume ATS Analysis Report') + analysis_date_label = self.analyzer._html_label('analysis_date', 'Analysis Date') + score_breakdown_label = self.analyzer._html_label('score_breakdown', 'Score Breakdown') + executive_summary_label = self.analyzer._html_label('executive_summary', 'Executive Summary') + ats_results_label = self.analyzer._html_label('ats_results', 'ATS Simulation Results') + improvement_label = self.analyzer._html_label('improvement', 'Recommended Improvements') + detailed_label = self.analyzer._html_label('detailed_analysis', 'Detailed Analysis') + keywords_label = self.analyzer._html_label('keywords_match', 'Keywords Match') + experience_label = self.analyzer._html_label('experience_match', 'Experience & Qualifications') + industry_label = self.analyzer._html_label('industry_alignment', 'Industry Alignment') + content_label = self.analyzer._html_label('content_quality', 'Content Quality') + format_label = self.analyzer._html_label('format_quality', 'Format & Readability') + error_label = self.analyzer._html_label('error_check', 'Errors & Consistency') + chart_alt = self.analyzer._localized_context("ATS Analysis Chart", "ATS 분석 차트") + not_available = self.analyzer._localized_context("Not available", "제공되지 않음") + + score_values = { + 'final': self.analyzer._score_value('final'), + 'keywords': self.analyzer._score_value('keywords'), + 'experience': self.analyzer._score_value('experience'), + 'format': self.analyzer._score_value('format'), + 'content': self.analyzer._score_value('content'), + 'errors': self.analyzer._score_value('errors'), + 'industry_specific': self.analyzer._score_value('industry_specific'), + 'ats_simulation': self.analyzer._score_value('ats_simulation'), + } + + def progress_class(value): + return 'good' if value >= 80 else 'medium' if value >= 60 else 'poor' + + html_content = f""" + + + + {html_title} + + + +
+
+

{html_title}

+

{analysis_date_label}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

+
+ +
+

{score_breakdown_label}

+ {chart_alt} +
+ +
+

{executive_summary_label}

+
{render_markdown(self.analyzer.final_report)}
+
+ +
+

{improvement_label}

+
{render_markdown(self.analyzer.improvement_suggestions)}
+
+ +
+

{detailed_label}

+ +
{render_markdown(self.analyzer.analysis_results.get('ats_simulation', not_available))}
+ +

{keywords_label} ({score_values['keywords']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('keywords', not_available))}
+ +

{experience_label} ({score_values['experience']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('experience', not_available))}
+ +

{format_label} ({score_values['format']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('format', not_available))}
+ +

{content_label} ({score_values['content']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('content', not_available))}
+ +

{industry_label} ({score_values['industry_specific']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('industry_specific', not_available))}
+
+ +
+

{self.analyzer._localized_context('Competitive Analysis', '경쟁력 분석')}

+
{render_markdown(self.analyzer.analysis_results.get('competitive', not_available))}
+
+
+ + + """ + + return html_content + + def generate_text_report(self): + report = "=== ATS ANALYSIS REPORT ===\n\n" + + report += "SCORE BREAKDOWN:\n" + report += f"- Keywords Match: {self.analyzer._score_value('keywords'):.0f}/100\n" + report += f"- Experience Match: {self.analyzer._score_value('experience'):.0f}/100\n" + report += f"- Format & Readability: {self.analyzer._score_value('format'):.0f}/100\n" + report += f"- Content Quality: {self.analyzer._score_value('content'):.0f}/100\n" + report += f"- Industry Alignment: {self.analyzer._score_value('industry_specific'):.0f}/100\n\n" + + report += "EXECUTIVE SUMMARY:\n" + report += f"{self.analyzer.final_report}\n\n" + + report += "RECOMMENDED IMPROVEMENTS:\n" + report += f"{self.analyzer.improvement_suggestions}\n\n" + + report += "USAGE STATISTICS:\n" + report += f"- LLM API Calls: {self.analyzer.llm_call_count}\n" + report += f"- Total Tokens Used: {self.analyzer.total_tokens}\n" + report += f"- Analysis Time: {self.analyzer.total_time:.2f} seconds\n" + + return report \ No newline at end of file diff --git a/backend/ATS_agent/upstage_parser.py b/backend/ATS_agent/upstage_parser.py new file mode 100644 index 0000000..4e30bfd --- /dev/null +++ b/backend/ATS_agent/upstage_parser.py @@ -0,0 +1,52 @@ +import requests +import os +from dotenv import load_dotenv + +def upstage_parser(file_path): + load_dotenv('.env') + api_key = os.getenv("UPSTAGE_API_KEY") + filename = file_path + + url = "https://api.upstage.ai/v1/document-digitization" + headers = {"Authorization": f"Bearer {api_key}"} + + with open(filename, "rb") as f: + files = {"document": f} + data = {"ocr": "force", "base64_encoding": "['table']", "model": "document-parse", "output_formats": "['markdown']"} + response = requests.post(url, headers=headers, files=files, data=data) + + if response.status_code != 200: + print(f"API error: {response.status_code} - {response.text}") + return None, None, None + + try: + response_json = response.json() + + coordinates = [] + contents = [] + + if 'elements' in response_json: + for i in response_json['elements']: + if 'coordinates' in i: + coordinates.append(i['coordinates']) + if 'content' in i and 'markdown' in i['content']: + contents.append(i['content']['markdown']) + + full_contents = "" + if 'content' in response_json and 'markdown' in response_json['content']: + full_contents = response_json['content']['markdown'] + + return contents, coordinates, full_contents + + except (KeyError, ValueError, TypeError) as e: + print(f"Error parsing response: {e}") + return None, None, None + +if __name__ == "__main__": + file_path = "sample_cv.jpg" + contents, coordinates, full_contents = upstage_parser(file_path) + print(contents) + print(len(contents)) + print(coordinates) + print(len(coordinates)) + print(full_contents) \ No newline at end of file diff --git a/backend/ATS_agent/utils.py b/backend/ATS_agent/utils.py new file mode 100644 index 0000000..bd11ecf --- /dev/null +++ b/backend/ATS_agent/utils.py @@ -0,0 +1,311 @@ +import os +import re +import html +import unicodedata +import matplotlib.pyplot as plt +from matplotlib import font_manager + + +def normalize_text(text): + if not text: + return "" + normalized = unicodedata.normalize('NFC', str(text)) + normalized = normalized.replace('\r\n', '\n').replace('\r', '\n') + return normalized + + +def detect_language(text): + if not text: + return 'en' + + normalized = normalize_text(text) + hangul_count = sum(1 for ch in normalized if '\uac00' <= ch <= '\ud7a3') + latin_count = sum(1 for ch in normalized if ch.isascii() and ch.isalpha()) + + if hangul_count == 0 and latin_count == 0: + return 'en' + + if hangul_count >= 50 and hangul_count >= latin_count: + return 'ko' + if hangul_count >= latin_count * 2 and hangul_count >= 20: + return 'ko' + if hangul_count > 0 and latin_count == 0: + return 'ko' + return 'en' + + +def configure_plot_fonts(language): + if language != 'ko': + return (None, None) + + preferred_fonts = [ + 'Malgun Gothic', + 'MalgunGothic', + 'AppleGothic', + 'NanumGothic', + 'NanumBarunGothic', + 'Noto Sans CJK KR', + 'Noto Sans KR' + ] + + candidate_paths = [ + r'C:\\Windows\\Fonts\\malgun.ttf', + r'C:\\Windows\\Fonts\\malgunbd.ttf', + '/System/Library/Fonts/AppleSDGothicNeo.ttc', + '/System/Library/Fonts/AppleGothic.ttf', + '/Library/Fonts/AppleSDGothicNeo.ttf', + '/usr/share/fonts/truetype/nanum/NanumGothic.ttf', + '/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc', + '/usr/share/fonts/truetype/noto/NotoSansKR-Regular.otf' + ] + + previous_family = plt.rcParams.get('font.family') + font_prop = None + + available_fonts = {font.name for font in font_manager.fontManager.ttflist} + for font_name in preferred_fonts: + if font_name in available_fonts: + plt.rcParams['font.family'] = [font_name] + plt.rcParams['axes.unicode_minus'] = False + font_prop = font_manager.FontProperties(family=font_name) + return (previous_family, font_prop) + + for path in candidate_paths: + if os.path.exists(path): + try: + font_manager.fontManager.addfont(path) + font_prop = font_manager.FontProperties(fname=path) + plt.rcParams['font.family'] = [font_prop.get_name()] + plt.rcParams['axes.unicode_minus'] = False + return (previous_family, font_prop) + except Exception: + continue + + plt.rcParams['axes.unicode_minus'] = False + return (previous_family, None) + + +def restore_plot_fonts(previous_settings): + previous_family, _ = previous_settings if previous_settings else (None, None) + if previous_family is not None: + plt.rcParams['font.family'] = previous_family + + +def render_markdown(text): + if not text: + return '' + + try: + import markdown + extensions = ['extra', 'sane_lists', 'codehilite', 'nl2br'] + return markdown.markdown(text, extensions=extensions) + except Exception: + pass + + try: + import markdown2 + extras = ['fenced-code-blocks', 'tables', 'strike', 'code-friendly', 'cuddled-lists'] + return markdown2.markdown(text, extras=extras) + except Exception: + pass + + return basic_markdown_to_html(text) + + +def basic_markdown_to_html(text): + lines = text.splitlines() + html_lines = [] + in_list = False + in_code = False + code_language = '' + table_buffer = [] + + def close_list(): + nonlocal in_list + if in_list: + html_lines.append('') + in_list = False + + def close_code(): + nonlocal in_code + if in_code: + html_lines.append('') + in_code = False + + def flush_table(): + nonlocal table_buffer + if not table_buffer: + return + rows = [row.strip() for row in table_buffer if row.strip()] + table_buffer = [] + if not rows: + return + + header = rows[0] + separator = rows[1] if len(rows) > 1 else '' + data_rows = rows[2:] if re.match(r'^\|?\s*:?-+:?\s*(\|\s*:?-+:?\s*)+\|?$', separator) else rows[1:] + + def split_row(row): + return [cell.strip() for cell in row.strip('|').split('|')] + + html_lines.append('') + html_lines.append('') + for cell in split_row(header): + html_lines.append(f'') + html_lines.append('') + if data_rows: + html_lines.append('') + for data_row in data_rows: + if set(data_row) <= {'|', '-', ':', ' '}: + continue + html_lines.append('') + for cell in split_row(data_row): + html_lines.append(f'') + html_lines.append('') + html_lines.append('') + html_lines.append('
{inline_markdown(html.escape(cell))}
{inline_markdown(html.escape(cell))}
') + + for raw_line in lines: + line = raw_line.rstrip('\n') + + if in_code: + if line.strip().startswith('```'): + close_code() + else: + html_lines.append(html.escape(raw_line)) + continue + + stripped = line.strip() + + if stripped.startswith('```'): + close_list() + flush_table() + in_code = True + code_language = stripped[3:].strip() + class_attr = f' class="language-{html.escape(code_language)}"' if code_language else '' + html_lines.append(f'
')
+            continue
+
+        if stripped in {'---', '***', '___'}:
+            close_list()
+            flush_table()
+            html_lines.append('
') + continue + + if looks_like_table_row(stripped): + table_buffer.append(stripped) + continue + else: + flush_table() + + if not stripped: + close_list() + html_lines.append('
') + continue + + if stripped.startswith('### '): + close_list() + html_lines.append(f"

{html.escape(stripped[4:])}

") + continue + if stripped.startswith('## '): + close_list() + html_lines.append(f"

{html.escape(stripped[3:])}

") + continue + if stripped.startswith('# '): + close_list() + html_lines.append(f"

{html.escape(stripped[2:])}

") + continue + + if stripped.startswith(('- ', '* ')): + if not in_list: + html_lines.append('
    ') + in_list = True + content = stripped[2:] + html_lines.append(f"
  • {inline_markdown(html.escape(content))}
  • ") + continue + + close_list() + html_lines.append(f"

    {inline_markdown(html.escape(line))}

    ") + + close_code() + close_list() + flush_table() + return '\n'.join(html_lines) + + +def looks_like_table_row(line): + if '|' not in line: + return False + parts = line.strip('|').split('|') + return len(parts) > 1 + + +def inline_markdown(text): + """Handle simple inline markdown such as bold and italics.""" + text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) + text = re.sub(r'\*(.+?)\*', r'\1', text) + text = re.sub(r'`([^`]+)`', r'\1', text) + return text + + +def extract_score(response_text): + patterns = [ + r'Score:\s*(\d+(?:\.\d+)?)\s*points', + r'Score:\s*(\d+(?:\.\d+)?)', + r'score of\s*(\d+(?:\.\d+)?)', + r'rated at\s*(\d+(?:\.\d+)?)', + r'(\d+(?:\.\d+)?)/100', + r'(\d+(?:\.\d+)?)\s*out of\s*100', + r'점수[:\s]*(\d+(?:\.\d+)?)', + r'(\d+(?:\.\d+)?)\s*점' + ] + + normalized_text = normalize_text(response_text) + + for pattern in patterns: + match = re.search(pattern, normalized_text, re.IGNORECASE) + if match: + try: + score_value = float(match.group(1)) + return max(0, min(100, int(round(score_value)))) + except ValueError: + continue + + return 50 + + +def advanced_preprocessing(text): + text = normalize_text(text) + text = re.sub(r'[ \t]+', ' ', text) + text = re.sub(r'\n{3,}', '\n\n', text) + + return text.strip() + + +def extract_resume_sections(text, section_patterns): + compiled_patterns = { + name: re.compile(pattern, re.IGNORECASE) + for name, pattern in section_patterns.items() + } + + sections = {} + current_section = 'header' + sections[current_section] = [] + + lines = text.split('\n') + for line in lines: + matched = False + for section_name, pattern in compiled_patterns.items(): + if pattern.search(line): + current_section = section_name + sections[current_section] = [] + matched = True + break + + if not matched: + sections[current_section].append(line) + + for section in sections: + sections[section] = '\n'.join(sections[section]).strip() + + return sections \ No newline at end of file diff --git a/backend/api_test.py b/backend/api_test.py deleted file mode 100644 index e16e7fc..0000000 --- a/backend/api_test.py +++ /dev/null @@ -1,18 +0,0 @@ -import requests - - -# POST 요청 함수 -def send_post_request(resume_path): - url = "http://localhost:8000/matching" # 실제 API 엔드포인트로 변경하세요. - data = {"resume_path": resume_path} - - try: - response = requests.post(url, json=data) - response.raise_for_status() # 상태 코드가 200번대가 아니면 예외 발생 - print("POST 요청 성공:", response.json()) - except requests.exceptions.RequestException as e: - print("POST 요청 중 오류 발생:", e) - - -# 함수 호출 예시 -send_post_request("data/joannadrummond-cv.pdf") diff --git a/backend/main.py b/backend/main.py index 1b2ad40..5e8b7e5 100644 --- a/backend/main.py +++ b/backend/main.py @@ -26,7 +26,7 @@ # from langfuse import Langfuse # from langfuse.callback import CallbackHandler -from ats_analyzer_improved import ATSAnalyzer +from ATS_agent.ats_analyzer_improved import ATSAnalyzer # 캐시 저장소 diff --git a/research/tools/.gitignore b/research/tools/.gitignore index f424725..44527c1 100644 --- a/research/tools/.gitignore +++ b/research/tools/.gitignore @@ -1,3 +1,4 @@ git_agent_scrap_base.py git_agent_token_base.py +blog_agent_scrap_base.py encrypt.py \ No newline at end of file diff --git a/research/tools/blog_agent_scrap_base.enc b/research/tools/blog_agent_scrap_base.enc new file mode 100644 index 0000000..d356ef7 Binary files /dev/null and b/research/tools/blog_agent_scrap_base.enc differ diff --git a/research/tools/blog_base_loader.py b/research/tools/blog_base_loader.py new file mode 100644 index 0000000..90a8519 --- /dev/null +++ b/research/tools/blog_base_loader.py @@ -0,0 +1,42 @@ +# loader.py +import getpass +from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.ciphers.aead import AESGCM +import sys + +def derive_key(passphrase: str, salt: bytes) -> bytes: + kdf = PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + iterations=200_000, + ) + return kdf.derive(passphrase.encode()) + +def run_encrypted(path: str, passphrase: str): + with open(path, 'rb') as f: + raw = f.read() + if len(raw) < 28: + raise ValueError("Encrypted file too small/invalid") + salt = raw[:16] + nonce = raw[16:28] + ct = raw[28:] + key = derive_key(passphrase, salt) + aesgcm = AESGCM(key) + try: + plaintext = aesgcm.decrypt(nonce, ct, None) + except Exception as e: + raise ValueError("복호화 실패: 잘못된 키일 수 있습니다.") from e + code = plaintext.decode('utf-8', errors='replace') + compiled = compile(code, "", "exec") + exec(compiled, {"__name__": "__main__"}) + +if __name__ == "__main__": + enc_path = "blog_agent_scrap_base.enc" + passphrase = "1234" + try: + run_encrypted(enc_path, passphrase) + except Exception as e: + print("오류:", e) + sys.exit(1) diff --git a/research/tools/config.py b/research/tools/config.py index cb43350..148e1e0 100644 --- a/research/tools/config.py +++ b/research/tools/config.py @@ -1,4 +1,6 @@ OPENAI_API_KEY = "" GITHUB_TOKEN = "" GITHUB_URL = "https://github.com/Pseudo-Lab" -QUERY = "JobPT 레포에서 최신 커밋에 대해 요약 설명해줘" \ No newline at end of file +QUERY = "JobPT 레포에서 최신 커밋에 대해 요약 설명해줘" +BLOG_URL = "https://day-to-day.tistory.com/" +BLOG_QUERY = "이 블로그의 글들을 읽고 어떤 사람인지 알려주고 어떤 글들을 주로 썼는지 분석해줘." \ No newline at end of file