From 66cf04ba80af8d45dc86d26e6165790d9d6e6eb8 Mon Sep 17 00:00:00 2001 From: MinwooKim1990 Date: Mon, 29 Sep 2025 16:02:47 +0900 Subject: [PATCH 1/2] Blog agent --- research/tools/.gitignore | 1 + research/tools/blog_agent_scrap_base.enc | Bin 0 -> 24695 bytes research/tools/blog_base_loader.py | 42 +++++++++++++++++++++++ research/tools/config.py | 4 ++- 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 research/tools/blog_agent_scrap_base.enc create mode 100644 research/tools/blog_base_loader.py diff --git a/research/tools/.gitignore b/research/tools/.gitignore index f424725..44527c1 100644 --- a/research/tools/.gitignore +++ b/research/tools/.gitignore @@ -1,3 +1,4 @@ git_agent_scrap_base.py git_agent_token_base.py +blog_agent_scrap_base.py encrypt.py \ No newline at end of file diff --git a/research/tools/blog_agent_scrap_base.enc b/research/tools/blog_agent_scrap_base.enc new file mode 100644 index 0000000000000000000000000000000000000000..d356ef72a41c36b93646872b2a86346b4ee8a74c GIT binary patch literal 24695 zcmV(lK=i-`4N3Y;ioL{-_eo!?Lx_8PFM| z$71=&|MSF|%=OeMUGgHcW%L4gBE{n?8i>uca8_j^p1QcZo}46s(O4=P%7=>pC?3?x zAs-Xf6aas{OCIy(P=_QPa#7DtJ%ZRW(7GACz`wq@W^De6aRPYwvSKiurtom_89F_0 zu$e%Ob7!d`oeFpyDpmIh)c=+6yos9!nfx<=n#T}Ogte@qf!ZzCsABDx@m!8gBbTzn zB!Bk7)x(Udf^40|m0rGdVCR)Vt%P7w&1@$m#tSH^{MvoF6_yre3M(XnS3#XbX}M;I z7P{Jvgj5>tu64c*&uW0;a4)&oq2--N$3|FDW_&F_2_Mg_v&n9qe-7YLw)rc`f9%W4 z*nyYH2P2bj(bZcJhH+>doq4m=f0Z&nF*y4SZLalAO}!*h*oX3*I?R6CZ72@RpQgp% z5n!4qARWDy@s$!PJ}cU3w1Brw(Qs0)*kmCaDuOAa(lS1<+@t1G3+?9@3hYY^UH(9B zy}i-+48GL23?qZB=71?|g^$|U$&ddmfmt9IwwU^q)E}G2+yp)IX8U)nfJn6_i^DuUd_plRO-ay2 zgB>{#(9+FJYwWP-U-z@jG)2k}+R`-5y&|;tvAk?`Gsf zp*68PJj+eDjdX^P6(ZO6I(Gd?@SCrUso<)-2*W}^VRq}0F5t@=%?u6j9%+C3D^78X zp5@OxQx^S$5T5z*uMi$LMx_%+?S)Q)UsetdwttZh5j{H!>xgRK5*{MWp1Q|-Gv-q0 z&jdjL=P2|QPbL{-pThUgQ+MGnmtqvMMGieLUQDD$&qOyTXc<*?_fbN+a(ha2%A&qI zYn}s^cq>~-Dx#cHpCqnJHFrr{J3tS!KSX%r4l38)HFb;}9= ztuuNw28_GOK${^GXHKkgBBP;`$5Xt?j65p|A8qC52Nv7PwLsZTY%{+dKOz=#u!k{# zXRJ1*7TCB9$!xCch;H07sB_e6nE>3@n`qZ1??aMxOU;JXIR>GT zPGd{?Ul$DhkLi(-AjH&iftX@-Fcn{ZC-`VpFBd?V6(scIWAipZ&#y{>*X#RO_qcJO z05U1vBkbX&^oTp9)*lOw3LxrSO-sG4ayy2qK{ zQB=OIWHN2i`wY0y*FTEtWL12@49GZ1g*n#3;yi`HksFg7~SDh6VoEhThqz-4cd zx%_nxoD$Q_lqGgicarwiGu`xzhR`HqT%i8DgBQegIw@||+PLRh+D=@$U5|R|P#oOb zk+|=C&5p6rM0E*kQS-aUJ0J+1r-+N@yf`|QCNuPi6_mqzt$X#oQB@1|;*LU2)1 z5WyhWuIJLquw*s2#MQT3tq=F2dEWu|5Xklu7j+jYrOvkL>!lP<~}D5J9_+ahWLgFrU09u(C8cqC(OxTsTG_Vi|n6Qe-#ZzSZgs)({Bz!Uaa5ShBYSC<;Q=&}~I$5pOv=`~;Mk1c=1Q-wnCF1xienv;{f+q{;nKkps(g;@l%sg${Ea z*CHp359g@diFB!ihG8W!BOw%6N#e(_N}@6Y>&lKEd8RyW1or|*V%iUR82eaN6Yh=P z$bUT&!g2F&`GGZg%*#90%&uj2kNepGYF`GkJV};vcN7nsp0XCr4=c<=4qJ_m55=m( zV}y;Bn6@A2pui%}F|b<_%6IHbX+lT}* z-1CE1n3uZ0ISSYlP!|`89=N(;D*QpHaw|YetC<=F87t|f0KYK^h<(I%g?S%|M!0pf zL+sn$3_ry{k6!p;65eHry;{oU!S&cl;$lrsj&y3zIx93HoBTz~4? zy+%sR%Cvlqh<7_&Qa#rrjc1Vl;FQLeV6Tc~2hbpc9F4V`_z;F@+#r8ypDi))(Gzsn zIFKYsRA{u`u7H|OJ995$P2^P*|F@hfi_k^RrLXQ{>X*M7!^iCn@Y!2 zxDX&O=u0I+dWwq`0h{dv3z|r@&HJsX;u^nh)Q4+2n()e8&>Ev7k=thE6LlU30VhdWrYivnK89BXTYik3LKiaHi)Y?syxm1AJ%@k=n`i#1`*S?HZ@BujDIiM{P_Y^$S zOOMfqq=T%sr~oF;Y@fF9Nw_C#4_Y_R#H%X=Z>u8vnT}JH3_s1|!XT5xa$Rxq3imHQ zS!zTf59YcZG=j~d|Ae($uuGHKGWF`WhiV3hA96UhYXr_W3J~jo=Fg%GhBJ)(pH^DE z=kZlKLz7N9jW6dCk`i`Q3j)LYnw$=AK;}Q)Xc?MhA6ST)IO65RI2Q<@^ehstxisGT zM6i~F(~GWg$XSll;$chucgv5h=v#iVBW#W`5F?EUfbfw%9NJ<1U~mzZ5&vdGI`Gk) zwOV)5IpX#h@s+I9fiNdM*LD}|8fjWRlYy82GT+|*My@{UVP&q{3k=UHRMwh?AAn}FElvcwD z6tcDBF`T8o^lqrmie;(Y(8#1Y<)F}s0BTs7ZkQ*N_=IEjkbLJhw(KPczWvu5G4nOq zY?-)$Qtn-Y46zTks(<+V7n zWB|LZmO#fPV!qg%y<6LqLxp&>nyKvw?`#~XeP^|`4*@{-dw?tJLUzR4YwdmCN?>B0 zlKgxG;}5Hb*Tnb8(>x!q%&#<()+#j!@*MWpY^2ZS(b@JndHhVfwr!E~3E)u8N-J^~ zK{n|qMIqr(KP>iGUetf3R-q8CuR>4Xoyl-(J3|-XfI}H_n3TvUw7KypZ8*#j#;GhE z)re;B^HP$le;4#=8|j`_@x3Af@c?+knxenJ09gFY9$Hs;qBm?dwF#vE+Yqq{giw@3 zyDAD0r-9w2I8DVKd}XjFr*32d0ic`_tK&wu^$+9pqTJM2meL5@1vWL=My-rf<;69J z&HWmhj;eQ`+u9ZQP+lZVC3geStoyKS>y^FlkNY4%HgBaSpjfVZRO80K7(7z`5fp`) zrOI}y4>}bEHUvYTx(a#HhwJ7(wUGZfQnU2@-pb&BldrKRK6cF8z^yM0K;U>_uC%Hb zJVx7a330@3FG;nD5tncoQ>2N%!jwr*a8)?&@zkSak2Y5f_=oQoyPeO?Z|cQ(yNu5 z+}NjMJA3Z0*fi)lsT%8x+h5zxtOsh}X%{@5 zfgkuV^0Pjix{IHQZT%L+?(6D2ulHyA4KNe>o)fw;fMQ5ZU%_mq=tf4m7lG)a*@&f2 zxq3*KB2{EjH=Ox&HWP{n%n?(iovz;NG6%^d>a6E%f~aXbOJ%n#M3UGyFTs6RQ^>f+^wm|x#*cayRK0QqHT@|B-k0bmmB^jA6mGPFVAx2z)oPCOF}!Ko8B!s&TY`*u0&y=hQQjVeSc!(Tv4sqZPXx}fQC++0Sp?XaF(eK-Ox{JAbt$K^j@sJ&kI-|-w zHc1bGnGF1uzXK_JS3BN7LGg$$B7L2>mnjKeB;2Qf3G@hbxEpc*_eCDUiE~u^RzNd^ zn|saxuJD~SPTHZ0vls5GnvCl9ckl44M${W^QFui2B?m0ywQ9w6dVFb3g;()e8aQXU z7d_t9v$r{T*+%Ho2H@^`V_gom6@Z$|C{>&GOYlOE{> zR{@;7CU2n$%(e#l46_8(@#FMF>t0T^%&|wT$^whL+R)nvEk)TCJLItUmq3t)M?z5u zE+O2oUPTo+4U*5s$;uO^7oQN^$2Z2#HtzH;w2MlSU@qJ$W!bWb|*xdR1 z@-eRwaP}MNQvN51MjH=V0%yZEk)*~%pr8wtIO*gtXX{UXg7E)h*1v&zTTvfp*Wtd4 zZz6V~14&YWL-~@GFGlOj_vK~#;Atmt!5?W=ET4zMh@pT;%|-QEWOA`iC8vV}{R!~C z+2-Jb*dQ#?MVJXdMwsI$if=JfcGj&J55kiHLIhY3`+BcU8@mted)D?Xm+~kY%E@LQ zB*yw!qFWJ7IHf7te=GiFEG{=h=5t}QshS|Tra}xm2YmcD-hA=6dhbxLf)j=oYDgvF z7c5tI*;7uBZA4yYRF6vCK2I_Jf7RN!8xl&W4Lt}_VN3IYpm;}}v6=9P9EkGMmP*1}TJj_1_;=vueTAf;PZjc&WrY z9cky&h+zV_*XfCCaxUVhJM$JuPMI>N=f3WN9$dITnpzjT|nddj$)L3WtfF^&GmF7+g!)wct9-ddncXff{p z3vGlm-L{vZTt3VfQlEMX5;62f#17+!9o+_~+FU;1t$L_Gx|h9PMh1ARdo4d#7xO-jBH9a^eLP4G+q+hD-eq4Jl+1PsbRu{^(iaDgthbO3%SLE zV>uISpI|0HCgJ_-=|b69IRhv#8tlikTlUw{ic4D_DCS3Ik+Ovqj6pB7RUHbv1U{GI z{WvRS>fN3Iw>+>jNYwQmbxh7IFV(QtSWd~XmJ$~fLjJgwv#UMfm597jlOAVRZ3G>J zh>s-9xMk*-2dZ0xADOGe*ar8(e#G|(G?JyhGRrplCUEUr`p9mf6 zE;j_hpvRP3>6RhI8n@6mIEX_B23!WNtX@eyuXllNx6?^|@s_zj^K|hKE{G0DgCDJ5 zcyt<_miE-@7sq3b@8>iqY<6%6r*Q77YS%QXWB8HOX;awTXSJwt=xhBryBm6C`*7Sa z=uq(jMRep?p|*r5tC$ymqZwbg-3(*+C*#N$!pjHmu4%h3_%fA!Aof&q+b3b$6Da)C z0>9Wqh&H%8V4g|A_*}FyO5_Zk0vmL=bHi-|U#4sWT`s1Iu0!Q8Y@(QV+Gn1>Hi_-w z)@+oYL8#MIO+HTQN~xpA(-6j=&~sD3fp@YkrHETa_T}m%>{_{lxJ|BE>*NN@F02}y z<#`(l6pekJv(*9*H_;^#Yc&U3ho-o(r8{)){8wN%h0hiCPQH zetCG@o%Ls}Zp4Bz%jS&eFUAihJ}_`JUBtoW`0&5?eh}BCfMFVG)8BlT&shi^a*9ZN zh;Z;lvY+jvmWXyGTuXDf?$K2I7E=zcmp9Oz4Y!`M@f0HG`^&?L_J#XY7SYLT{&i`V(|CY?CsfMh=~`QRtS zU>h(SJeCBtwYKzNNvgzg^omWD2PJkztYZ(fRs6G#BpQ%`;=ITD#8~Eg7xPdk`>lrL zGi-?9K&EXD*jFYmzk1(dPbx6qWPdee+bg~#M}YJ{O|`-v+iFU^1UVUu=uq4?O3cIqs|If{zYP$Y_w|7Vb zzHZ?YHSxI5K~jJ_(LU1*FsRjgNlElrjk6W0Z04)`Y4vLQNsxDbVA4HY?E^jHD%V?U zgA}8}o;YMJysZlPOJWUW1IOOsX`5{`LGm>M@BxKBb7se16{20<{7Q?VY)er)|l;)D_M}}mmg~byEM42 zC?hunFkI1NCD=u?7bKM~L;8zV8NgziYj;zpZF+;#LDD=5fb$!$inxOtI{k-gBWZ%; zZHL3qH&w&O+(da}4W*zA=w3JCETk+?r6{I7T#9P$wfG|b0`4y@SlXZ*Ot~ad@90Vk z>V?GTIs2u%T-=I1RBp3x%Z($wiOd^1@Ryd?%MAGq=4AlbnAJOB8tdre$SuLz%@fa` z{wt4S&D)hkDWl3lZrM;a@nvVx>(` zCQz*@zmQ^)mAvao+<*Nft(#2ui&|mb`G}KSgY<;28lZ#KOaMAYldS%2N`4)DzuPEh zDDadM=x=az#SgY+H)sc4on<#qG%gYvCC2pW8KQUCwDof(usg068d9MCrQ90QoVpK7 zef2!x^C1qf0m^~SjjD!lZ+vK3CsH9eyk!kdUGvvoVjY*V`81N#d5=*Y{G~7HrmX&n zS*Q-;qjMrV|J?Vt1*4J;t{bG+c*9!(UOvtx^^XV7>e5S{OtBF&t z&P759V<9MSnx2G|Sn*sem)`iwmXzfU8$d{-xA^ z{;)vC8sfCAT zL8-{jqdwjLLKs(KT$wgY2t~(&qs85{Am(k9nJFBC(fMk75ddRxlb1$E%=cm)9Fw0H zJN~{3Gov2vV(DCscyfq~J7?MHn}0_UxL;Zwa+og}Bt507u}<*9P6nUi9=D7oVd#{t zr;av(*0fNrBi@N?8wMWsA*^s?sjR<@&4xZRTFjT3afacj|Dlb`^<*hl67J6zi`8S( zKckn0s)C$IQ_U?C({+wZ+WX7o!?Qi?6lsK`tZsrX_j`;GHR?blLKs7iOk=X3%g`Qt zr?RNRG7EJo$ru1TU`N=qanHWQBJ-=8(S81t}e6!HZ-Dx7Fl3F#{BM~X^7 zbGN`$?8vuR06jK4VZomp(CcoWYHR46ui35L3^h>~U|Cq(QyeouGsUmbC1(J!bbZP+ zZ3(_8EI_I@G}>E8#r&xZ8}B%neXJ2q-co8H zBK3Sb{o|El=JTOW;#xMliW*ogeS2!GR;psSk2gv-(xR!N6<|&!&KT-q^$;<|bcH;K z_B)R9xW}Phrt*nlm4d?6YWJ!de=NFnXx^0${!ZqwO(0xh~% z-OA|vCd3WK-$P7XJ2d`x!CIEA1`({Qld2NRp7mi#jMqrfcLMtUQvv)&a1&35)_A8s zgsv?=Bk0^CT4&%GiKfD478I_GH1aKBFf_lQZ#)>z}At)L6Y8Qb(O5{A9eCtL-7>ZG*2d?;xLX6 zV!grT$vWL0G>b{|UcP5dl{Z?27y=FFejv*hrJ)>XyU9~l@L*Y30?B2#O`&QIQN;0E2m-~7n}!siw~_vsxZ>b z7gzEv^-3{g8NlxEe+CSb$p5%PhruD1#uxs1U?yex=t-?)biqkYHpllS5aaus@JiU zW~L{!W?iV&#{O$5_w|ybda}(l>2mPgYs%dJVWI$F^9$g@vLh-PJ!*<^-tB8`bZPiu zbtOBrc!t(A|-gH?iv_p*WOU9ekyl`AXg!ApGD%D2yTiI%X5&&>1?6n^``)#$>Os^ z2yc(y^$Dd6OC4o8xn&@}$#Tf)LFu744t~rjcf)j>QTBV*;ZMJ-U&?Q*fo&}yWPXgb zsIz!A3bzPd3d-_S%gJ9XC4}00tO!kC2wjOKL#ob8@tpV}gA*Rf?elBzZU&;XFKlfX zUI$A8@+cJy8k=uTzskKgI*mZM7?-jwp`kaIBs2@fzCHEltYp3I#G4w!sbzcAe^YbU zW8i?V7plhSC#B8rb0X4|eZqTN@HaYQj6>0U1|bjaiJ2jz3u)^#PAzs^rv=fMB@TXq zHQF5P@P;%-3^qFEAr$n{_Yv4=7#QCJQ5u4Sbu2wuoSG7&6l`|<lXqzA?cfkW2 za4G&c!4EM2i=vGO={#&|=nD#`>Hdk%xHT!XHWCSf$6v%OzFpL05HYOio5ahVRd}&d zveo40tWD-n=P2k{j|E_>UEqBcKBadl(;@xmEH{#*e;G@bDqqb z5KA8j;30VD6;;t06;TLwCi8tK5)d2oIm2Ufn z{<1IT-hFC2lvJli4n(CiWZZF}PtiUWEQQl8nd60UM58rM?S&f@yUueW)m53QSO|B8 z+*nh^kwvuz-5VIQ&j98XM)hlKAsg*(hreO$ur49!pEQ>=xf;M583+v)SRQI&Y3EDlIbNK+~z*U%m4(kw^f zU~8R^-kk_Dy}z;H7Tph>+FONvnW3N_f&!>M5CfAY;nlIdzpB*)eFsO zskrU$#5IJy(ab0JZgoFF$_5`H?ve&r(rHIb5m)?{3s3}v*{EGVVAdlKz4PmfrOkH} z7&Nr{9ucLb32`dkY5;EZ^UHd?E3;AOaqt^4p*NfMJWk#IT%dzEGT)gEWHIVMTiXwt zl8h^~MMU#=tZMIK$k834*{LJiPB^~0kS?Ty)A1LRjwz4fLu5$UN$bd&*Iep)fX%Z| z!fGxb7Nq-dc(9G1Z>LY_iU39JOlimIH?osBrAQ{7Dy}RxAVhR*;Sfr3Sm?UBY`1PHO{SS_Ct zSY*#w0d13G0YY4IKuI0Z5)(p158~yEOOOj9BMg!wv?KcqR|!QDAt^YJs4)>cIQV}T z5V?40;orG@tm#XK76>3+mGJMU>UR7;erNc_Pd9N&q>Waz1sYn~%T%X%tIE~L5bzJ! zq#6l@x65B5Jn{5_EM4qMDDAOPEydR>ubB65_|QATM)!_m`*k7i8oYy}x&j)Q%-B&i zeqwg%OLRwOc`02Z9-LJ=S{~@pm%8s7egGH6?`a_2Ym@P>{{{@yXy1xlz`u^Y`FWQZQqzFOFnlfaXbz&AuaX9rshR8TM(vmm+FVkl9ozd!^~EL=mcg+y6rmq22uMv4Z|K+o+F4kyC9seKT8oxpy<83_W+w3b zLDUn9Tq_6>J^p;d|C}44oK;{V`#}DIWTm^|8^=wVo(j*(4zA*y+{cszrX&~UuhSY| z(Bp)h)`mTvIP&@wI?35_0tqYHvs8~|{ExqNBXJq+r}pLflz*ug%(rs>oW-D00EL8l zxbJA#;msLxziTZiYz7XZ6xDr+je)0a~lE8Z}_Hq7CC zsSTO-$n_xLAPdIQo&1P^Tgc!1Hh;K<|B6`_ zz~g8eO6+g)y~qZRj>kIxQ|TKz$3ag0K)H>>Trz=h5BSJbLn4IR)NA=li-(GE_rVNg z5lWKrh&aJ}-hT}FeHDk=zW^7>YM8#bl*QsIQ=Q)gIanEbQO({-4h7{E+`q;7rB20^ z)%yG+Y$|7fuCDMrGzd~_G+jl_$XJEJT!<7*q-@6xZ=_ocAu2}|Fn|oOdLP~lu~Svj z42Lo@9|?D;J>w%#1YHTEC?alS0CU1Arr99O-AE%}#L!@1g2u~Kg#-)w24asxi% zTzsI{UvBPfMQr%&2nz8>1097uh*1)xqq5X%4Dqs_ibILkWI4nb;im|okI+I5cq@%d zf4<$nwH&tmqwEYK6$;TPl@NDX%NU-JS%7z36$W+zLxh8e0Uzz+&ghNR zrxKZ|?2nLQqA6cm-l=m1o}hQCdOOeo8*ceQ>6VgeE%a-_I2)H3@TfG+r-gN8e2w%z z$6Zf1DDP*$gR>jABFdwjICU|Z9Y(L?>Wi;^Kk=-p0h|q>-C95?&t&XAUhFg&+{CO% zWd&ZmU*K+@i&G?P&&r-u4Ll2p77NdfMfyjRNr7M}Z* z4#sJCPf^#zpF%zQU>*#uxdz}BC%!3#KU8HCsZ0HzA;B>x$R>^N94lu6yJKx?%2Q^6 zy-<#1gWV?RhI~eb&9m8r?%9E8x$(+!bEif#y>6)1hqhPg$zD5;BD(+2f}C87+{t#4 zzVNWwbFh%TyTC?PJ1}fMgbb!H83HsjBuU(af!^otQjL(aNvO-yZ?E1 zuSxq?+l0^at5eXyg9^HD@3$m|8gsf%9f8hp7)a0yAEL7o7otO;Q|qgNJ+$Tj&(xu6oj$#=N|O%B!yfNB)Z>!glaXd~MMecb@LtSCjz z^4k1N64TLpiYIKm`XPO}(i-V4X|Kc|JDS%DU|!fX9oY-ub_KAED4OR2|0LXWDqPq4 zxiiTxmjgc{(3J~bB3p$Iu@9t-T~eM#QS7g(J+)eul&=DGR*oY_v52oFhe*PL-x!XT zblRv{fQ&1%tAiRU0umgXLB$Y zP7%5#md)JmH{19Iz1SYPFTktL#YSCFvjg zk3oWtQ&7edIz2S^@wtFb$V=3eG!3)1LN=!S@m1&DmFkEHfvEO>x;B_EjS)5iFT^N4 z@iL8MA;p_|HROiG-6yD5#HEtJOQ2^nTFitrT5b)Fo7cGrD0fWdvJWWf>$-6~-KFrp2;aOqX&k`4-vW|rY6)MTtUk$sb zhc45E_hnPI+uL9Qr{BYOrs$DT|JJ(qJ9OM0s0iR^(w5E{V{>hfZSp5Ge27Ag@XVM~ zLD7y{yM9O_mlk@@Her|Fr9RQ^V=0`QUb|i{hBqfPiXTjr1iyjDix3C8U?S4YvB7< zg8L}W%WB%?*(k80ltl1~YI|xRkeqf~l5p}4zz4dM`HaZ8rY5F5%(9Jpea}Gx=ElC! zRGv68Ne*T7Zigavrk68mzvg=EzHEa4LZxOB;zzB)>T;=o+CovKWL*N!y+=5Lz_67e@i3=xv~k#0=7X)Q6`XtRKv4#s zU!v<2z#d9ApVO5Hj{r6u^!y@ushY5R9#sx`t{@zfUO`xpNpS=8Nvt31TtWeZXU(MU43*^lI;;H*T>SfdfCDJdWt(J&b=S*71!6KL})nE#SyF32EpO8#;42E?%@ zE2ql8L?hQOyd!oEk98OAp!cRXAX*Y8*jIh^OORu6QE!i%yABPQ&U>W*U(~#o6`phM zG00&xZ*H&@s7D78A!BE~NgW5HOC} z)Y9VK;t24g=)Nz*@@9a3I7q_II0n(`R%+F>wTvkZr*WduXpztd8fj?31$U9A(>s3- zYSG1A>~qKp!_kj(Xx2fe>sr#EVM=`i^XpyYwDpUmjh439MZ|ZZ@mbd2P_`JwYJ?)k zE;XxU3yzYUOcM6lhOb16HhX$aKvXOWKX}jT6b}*Z`5!Jk&|13T$lAip;C3NeVrZ9a z!%GeHb+n^P*HpL#CBlb><2V_$R@R67c>yNM4vnM@C$3{hBS|&nrogX1%j1Y-2M!y4 zmN@njpiQ!g*M0$MP%v}4rIcfORwnC*dH$mtl{yTQjd`aHb1NAseJP{0~EHTue2m+mlok&b`lAut#_`EVg@0uuu z#x>}uym`$Br=2|#lk@af=|%23fnJA7YD_qB0`vtQrQ%u$VXuINy*JUS49{k?oA+2$ z&GDXB9c1xB7U-{19T?s$K|dYleJ5)Jun4jtO+H#bxMi1(nlmvsNL)QL9L|^_j&*at zT44ptN6N9D5SI$Xwis<(4s*8t8C7LUWnRc~5S~EQ>d$zQqYDjN3>E~+P?!_$hO36w zCK981u`R28Jkj9MYVnHQ4zHUIU_8f57!+?p_MTtel%^gYLKl88Qv6!0_(me(w_4yQ zU^;JD1Wy@XmtuM*TU*27*rtu~OZoSfdOM7U;MTrxR|755qwyh{>GiXkaj4i=)atfQ znbefNw;1SN1+!x8(nd(#Y~H`nFO|bx~h{zA!ycq&@k7H7s8+ zZ<#TlUrH#z(@*DQGH0O3q+6B<>Kdx!8zPeqUfwK|1BW~nI3*w@3$b8RBt;>FKjFprP?OI%WZ8lOgiOu z9ph{(S9Y53tv411w&`)eFLe~7Sx7+YQh5)D5qKPl-5`BWqR7l@Wb>au_toB`hS>D$ zDT;3#t8o^K7LwNxgS|&E9v+gG4T7<(V^z6I^0AX(2?U=~ZpP`;d z4WkKcN}6=iuT-b+^$TpP2p|zqYm0UOVnsh3^TK5>A9chM(WCl~ewf|k5flI^*hC?+ zD;?(&=L&Xt5@w9~?Ry_aDe#52R}}_$lN-1%5UaF+ch6dW;0^mtU1^v9T&mGUjb+q? zhDq_5BXpq?V`pAabX5cmdu^&wET#z?;%+sfMnXkf6B2yFtABvHPFkTa9sJP+qs_10lz_wkZK zgo7~?ucM2$QF(W~*b43f_@B7Lcxs|3PHScFW6Ha%K)(!s!e~3t zuu;%VkTiBdSlnmwphWBHc9wITDv=(glGHDu)-bN54P*=A#6H^c&D4nDaA$0n?j(x# z^3YCZ-%2^~tUqwGq+xv8U=AvjoVG(eT=3i;!&Ac5nb++gPsgzA7CC$evHOqXK0`~X zVwm3cijNo$d!vj{kY1`N&xh0fXuMHs7pjEsUIumFpJ*9^SG86>#o<)Ruc@$5nm!Td z?k;#qNHB2ORd@9D&z-{MEVB1%`3U`x!@ORs!bK0ty{iPZ>Yg`Z@^KHspP1ki#mXu6 zuDBqG_8tG|gSEyKTVBO<@B;)rqlt? z;h0U%nilW>rM@_i+lL2;4&k4?qvZuRB-`5U!FvZkRR5M!X(KjiJ_a<4`;*QC9_&`M zNv;)6CF>CB>%o*(14DB#8{I&-F%Fq|9kyWr7SF65%tqkzP9bgdQa`te_X$!<`ReWw z*;pG0%+tHMl$~%4`+AH_cSS4(m+PE5k}S7#C6ivD%NgkvGk&@myB6$?{ zHSmDll|76)N&7_gVw;y|Y}a|$bEE1`UqxzamseC|pCZsZmlpS?gi!;mWcTU~NKRP^ zXMBk%zZ;CBECbZ!8Yo5{AR1YL;as|zfRJ2eO=Q*FB z1^U^+7C^ZJFrm$yRyK^eG}M`R`6qF~j>sVbcqE~4(Pq+|{W`5|S0Kq3Zv$oF6-~)# zT*?_NCb(MfB0+4~Xa@0R8%4!nFCH>)PZD>J9XDSyPBrmbK?b)!?Rn#K z0_d;=>vWY&I6gr~ru>Z0RVC+m7O)C19gl`$2Tpipl=uY2A z%KG)!JA7eRQ&0!CG*#HWI!wbgQjq}G-RA?Gm3T!pYL$sbFmgE`Y0|eo(rO=HqiB{P zX!iT5NBqz@lnXB9N1mcD<_zLvHW>|*$d}I!Z7NQ_&BKN4D9J<5_QVl6D;=sf@!>M$d+m6=A)@`rkWLheT~wSxgkwVJnc zmrVL~W z*-%MJSvh-`stuvLg0rIj^?t}iDn^*a}wkXV(&r2tpMZnuF*NVQ%RNx|CdiAuOW#$M4 z_XgDEgm$BGlVh(|ZIRtn#A7&Rqn0Srb^2f8>Yazv1;p#c2Fnx-Ft}L(Y7VyUSArx7 z#J!JM9Wd)BZ{pCyN_w)-@XyB0>R z7K`}{+phM-@2nhj?Dyo7H_a9nFa(0?H|HUSw4M;bsU!;Stt_5o3ch*M&S=h0Os9tS zy_h;w>rC&$Z?a#9Z9^^Z{Z3hM}=h> zIcmt6N`t$M7vD)ct#I(chGFQoaPIxn%n9Gneg_?mekobbwE?{z5Q#)DDEBN&9|+=E zc|^Sq{B6;ewLdmHB?7!+biAn4NUd-XYk4mk(l&xNOqwVnV*Y6ig|PQQ)vb>Tv7fy& zepP>ZC5{U8e;NuyNdj`|Y>661`xzo=SQ!kiH1)Tj{krbndFWOo;j}Ow|I!l5gztx% z35u`joOOsLUiRjc`yB~|u69|Rh5zV0?6$SB-xaf?7;aWf zUVr)?QW0&AYpG@v|0E67xwRhpwj{rQ2N41g#Ck=ZvZ5qFiDjhs_^RO72k-g5o*A z(j(rBpE0^Sd7h~4)=%NdB)}Df$7Os}ce3?yLmDl5+ZW2%Skp40Tl6zi)`vrdnZ+9!^y%VBE zhcg1saH+?beu0fv?%Bit&S}Y+)VxM%sF)c0OraUe!qNPObZTb$GuepCb|T$KCvpPQ zRh}{e_$g!stR|Wy0v)Q@9omutZG+#M?`BRYMqIG{lJUmJv~8jWji&J?(o^Q;plrK1 zI@TNzzT(Xe6+DT8;(8 zP@f)&*-E>sASWG#EPf{AEyJ-S1O@?YAd}zfnGQ?0zfVL>pFF~%&g^A1xROiPE9ewc zJj2W+C-9CRafi2NH|?;Si*c8FVI3dOTSFxa+A7>&J%}vLM}%i2?Mj{qP&FejOoJ6* zf~zv32r#s{ZX-<-AwV|a<$?XmDJFz?TEP4uN)8kJBAgh^Q|(qyIfz3}@u{trwOa|} zzAnCba!t0sorJrb2%hQ00zzA0i&?j$)RJ`agm=)3iHuj=b{}mQ+pHttK=_MpmSq!T z?GS{I_WC_cLj|+}+hw=9E6aXIqX$EPn;-lVX%Q@XD&5Kzv@G^j@)|}pucK6-tZS>R zRrWjwCEZuT%YP0KON4mh1)psoMb^T>lgw1wdKPcn!QHT0aK;35pQ7oi_z=We2E4fR z;Rm3v8?Ta9uj?0Dkae|j8@oO(bil0M9}8;D@rj&nv>-g(P-3`3;9PB1VlxaIwPvD} zj7@vWo9wK2|7}pWd5xT00Q>T!rRRRb0O_Wsa(o~=xSBWcJYcPG{d(i7dJdS^n$LqX z1$XxDl*keqRN5PN-7DmGxkH!~I3d@6`+M|_Z3GqdtI;_ReL)f@-~$Vy`=XkUkDbt< zaActL-!MCae$Ckv2mOmu|Gpm~nS{psfS_$AiN_8UyY>9HwEsBQX&588;GiiPC86Bp zA2&~LCxf5p96z8FZmuk(=u})7s$C2QldS7&*P`^@fvP{kq~NDkAlHB!7i8|qy4#9B zuKcH2AV5h}9NN@m?4+@sNLp}@TvV?aVF65L(ca)AcFf7x(6}=nk_HHxj<5XhcDA%A z7a{i2W2^KSnOFvv6s+-^g0h%Rh`h=OqEi>yEnx9*yr%0f&2ORY5b1w92-%ut0IQx$ z^sTUdM3_rrVx)!g^y!!%(|`?Ix#Pd=_x zw;+SR;{9vkHlE7w7Z?9lKg&?YtSJpGyHgle4w}$H7w$Pm2rQewE&OT|$R8_4@sP~3 z>K{FvyN3TCOgde*{25EYa{6fm${PfVX^|qi_%D#@z{g841~!>_e|5@?D6~|RZ4_vvy}Y~;*;Bb+;6=6RtCMmmOsz{G<%@Cj=-vrXREYHtGsLhVeY9KK zzb|Y%|0~oUSawrcOfRx3)l4?QAuPbFY1cCpq#-yE#gf%OtYg@Zl_KQG42?M`FM>98 zgD;SvQujGJ7Y|9bIJbXhJliP3AHlLR^^eN|4Ey3v)CrSD=o`$_kAuw_`>|)q5~%mU zY`eOQf6RNb%*E$B^xUb`W2j9`g3%T_^HKK#w0^-q+ijDY&i=~va#n7z!O5qiRiEfJ zhw&|;i+!=ytDs@%tXhjWRtAIu7gr8eTEnC3Y!Yd<79K47<+GK#j;fr*2Hn|+lY%}G zmTfIIr8S5GJK_oFj9xs*DKVcVBM?Skje$(e#zK zY6M|l{I(XSiEWir>O7U1dMGlh2__!R+;|=B*QuRnb6LJ$DpE@Z7{BS6;Q@6v@2VZY z;?vt|DP~-Tjhcfnn?moJ1_#F1sS0p4JjL5}sH&vU3^uIXdD8;XqYzYt455PkvA*#3 z>Ef}L*UcAXl)lhL&0F}(cL>~IxKL{|2uX>OMmgiqCZR6_cayja+5=!e)^j?&0EkeN z!tZEy=oe`-TJwIVD}UP>(>lnRTWZ4J*u?8dQARmcLOlm`PLaUKWBip?#dkNIbbO3X ztz2@isqtcW3n~lnBUYEjQgNdEtMG0ro|{DsWmfYNP}tM_b~f)1hd zpX^{N`~p-WK)p-29jkXi5RlB%BL^ps>MOrQZL5JJwg}F^l0;IAbLcn2;Vouq&Fhst zC@czCP&(+%zn=Yw=KoIL6rWZkyuaeEDe$jEBJMI1#A)si?TV{RdVtT|us*Af!7A!- zr9j~U82mC{r12PB`K}f|n9Qp$8CZkPNu9X41VdN>4x0DNV>r3kw)Eh)2A)xs6(0u$ zCI7gwgw%jOh{`QDlYOL-690)b9}8$faGo;&G0n$6-`O5c{9mf%G;m9bd~N&~q|HbA z>3?8i-$tW^;?=n9O%OgY!8&(Fv%eOuOk`)}N50j~JD)P`U$T5Df?F>In{9zQ>Mwn1 zw~mmbHb3`ZJJ7%1{DxZXZHODe4wc-l$i$9%2spa>^wl$-^@@o5wcY8!>eN7C}3^4?VZ4|s( zsM~~IsW*@}{nBHv(l5{^q~udCZXou;dV#$J$X6_UiB>uqO#RH&q%U=>`?maTYcD!#Fu_#+$Tx5#>fbn3<#-m zq)_*iP(dQr5u8EHkPDPZvRy<-PCc ztm{`4HJv^Z3%z^3C%tZ^3fc%RUTZpv%ag)QK31qk`EWJkhW+pZ%ln@74#)-3SH$`G zabgi0k0812v4ARR_srlRz`j@$4CnJV>G~YWSL%Jeao);~{_LHL<%Ag6ZdVJ1A}J90 zcKGhwR3Ig)W=@WRhGJ>s^Lm*N8K{!7pet?SIeq}^Zs1~rp_1A(3Sco>2ZFTQ`yfoW z>`vEHpfp*@)CoVk$BZ>AjDU5S;{)3c66iOz-~vq4nN%~I+)1x1CR~I(%2sh|a<9A7 zOyIT=hEn}B4+-t4cH%IiHlEA^wS(G6j|xFJM@rrq*IB2ziAS%aH|ZJHOgm>66aWmi zY|Py2Wuc;JCvhGNGZRgpuHXl~i$bDe>wiSf?U(;d_!uYGmh+Lu_#;M#k*8B{A5*{W z`{Q|nXgYw!yCaFwIr|gp`{GPw&?as~pVwa}61W&J3wGYw`o08Vn}y z9frM7C;RmYbTlyku(--lKphO!3xOSW1j5*iW3J9Lif>ImLZ;EarvW5nE~n%)(8j*_ z25o}ga~9;%Z=QRHfn8-InfRI?*`2P5CHZE*;{9uB%udo_=#g>IRdv$qa5eG~MQ?0> z%jW$z_-T{0tq2xl7k z@@>iG=!ewB3dt8BCydZ1Hrn{o3>Ql_rD^fY_Z4!Z5aIEN~m=9<;o%c`;@-EmG(@vD8q!x7x zr0ZH#(1u}ycO6=;v9T*#%-feC=i-zv;qi&ndDKX?;DMt9c})(s)XpoBk6#i2_A_?Z zG7$o*HEuiS3R~HlvE@2CLuGQY{D|Z)^F<)S+;_@iNCs2nB6Y%ZmkMTlt{!L5U2|vc z+fe|H)nNOf`4ip4Y!#ITq>D!@W&Ru&zufXPuEA0psp*8V`2u8nrj56}?%CF7=+lGhQIbJ=1>SCH57kMkN3dlqE?YmK*! z3hYZkn9mKjsY{=UxDsR$E{nE`U^3_>oB=@bbuntWzmydI0v60ZV9FlgDKV9?gaaBF<7880@evV9e zIaeEC{3HR^(4|N_Wa%q=-g^C~@cNDf)B|@!2%n*hdTb=0c*i1?BFKBahH z_n1IhbP@<>!Ih(rbHow#Btx@dD02JimK|@RV%9~p{AO`ZTFZ&O;}_kRIZhyf z*CJre#)f_s_A18Gj-(V%G>bXwd!PDo$JZJ7MYe9{NYF+S@_PNEanFHN2vAa`0@K;q zS|qD_K4k$_WGyUw+t87KbX2pebL{zPWoMD^ zKL_V%#l4?N3@Ui~oJ?we&1a*8_IUXtGeno@{$G6NXr}o*fC4RdXjsiod=!S=QEsm| z=N+i7tni&(y@S^Me&+;4XXaw>k3tqo?@IZ?HHaxb9qwe>DDEwBy9wng>xqUGTYobr zGIC(9tfd5|!~Ryy!$PhQi! zZCOijoZZkiI&0IVp-F{pk}zmf@rYF>(6zai%&bLAr;_gX7{?p*csYrNRsI7|1PQC#iOBu5S} z-sm0FO?`!Du)xuR<1RaK4Sb^o_`!lZwNxD=a?wvuO#nw+3bPF>8&^oHA`q-BD!{n} zwXN<%h1bd|`g?mRi1wrESxq4kyV=p`F3R$^;3A7J#F9FpLdvvGptZ+$!bGM8KHuln z9iAo2HcP&jvp?U($azac-IaXbjzV8=wgmTm)&I8DYa8PZe5r~TK_tJJHho?H!B2yy z>uW+bg~7s`4uIxj;$_Ad*Cw>BI?)wcfUMz&-eG_MaG7>?^Nj)bTHbOC9my1UQ5eUU z>@-frRcP_^@xTut3>lsM3<|vQm()CX5DejKTreevJ22uZFM}O>{U^L7+ul0Oi_A@*;3T z{d%gBe`4cETD()|q-&-7z zhQj@e9Tn@K3@v;W9#E|1TcRP-;)sfev<*-I_l(yh0?Fb=$B-hF~5*2MmLSOzdnwua-C1Umw`e^9DEnMs)McW+Tq6GSJga9GHLNG{8j3?%9fH9 zMzXARw7&}WD4-=w``ePgzR{?5-gvGxBpi~^6r&M(c&dsbP%DP%==l7EHW)d<9ns3V_bER}UlFULfOXUuFJK$Ur#@ zY;I!;Sd9cOgv_&FC@4-bkfh+~;t*luun=u!g`!=fugI$v4eQ@FTG&cG4zJj`VnKk{ zLa-&l8Ur2syPe#T$6=kDL#MoGpc7S@+*Ff!wg$aEvYQw^`V6`9y{SYtNl!rluLm#u z1<`1@qry7@XR-gUq98@DhNass5+3o<8DsB)4Q%uU0S^RSl(mX56JGGF@rDlun4I-;kxVrk*6|ie*-N z9E-YwtyYjWkcL5Zv?r9HPdQAQR6frtCMOjQ=2p+K2uRV78>W4-6Sm2!S^hJ|V@v5|fzhIV=Pv0DK`vtSlDOvhYL3A4NeqrThUykRnCs=EK6 zekWTCu)hqER1JFIn$;o&S9S0{5Ff~{<(Zzkv>BA{Mk_!W=^u!%U4ny62tb%dD_a;| zF&{a~f5`!t)&Dr%#+RRiY za2Q;;^OMCP@B{KcUF|F6sjJMF>9@7Yc0YEh?ng(L!TfJlyAo4=h|ULIjf}(+wT}mF zrP}JE_4i3O9=KhtN@Rrh6(z;cN}>7S76z}+!Qc0DEqQ^zTnfv@=j;YiURM9btClqp zJ>8wUAO!R$Bv!Q85g%AF(buuTj3s7hc{?sw^R~ly9K8mua`*(o-2k>>PWK)JdF*r{ zdvfM>zhu^G?}acF8tRk(Eqb#~$XssLHGAHZCYTTZ?g*0Qr<8xCRN|+I*8;@3LiYPi zVJ#Tly3$SfG#mCbSrO4ywA4Hp|FF9wS;MFy$*Nc6>B&Zb2zsx3P=M}^VflSkNy|-j zj%WEYzst&DpZgY%G!Q~3jpYo#o;~Ve@!7RE;rKGIq~*YUh5u89s>4aO?DH_%sN0x8cnI-zvMT0-Lfwi%&gRW0n}knk66ns-)=2C-Q&rh{71KNj9Fj zZN_~~|L9*SNTNwFB1yWeb;nz_oz-+nF~3qgy0gq$3dKio}Xm1tub zv8?^GfSIzzh20w|;;+z@F0lt}IO`i!_#yb~;Cb@s++0QSDyD~&G@x0YaA_QOJ*G%M zD&}0JanQNX_bn$rgX72Ctdu+j$IgASuS}FaZs$iBh)eL(*xA#TLn>@>PYO!+**E4_ z*gGW=*Dh^)JC8FycP!);rOO~MWTz)^T^T4lM@Ks`)PPqIfj=1k!i6}6 zzchnz+1=SS&;Q4k+ zE6N*ok@nfV+ZTSdmZbclh{l)IQK&C&@jG({?kJSAZ!L9ca)fPX#XZfz(i6_*zP2Z6 zCtWIJ`Aphd9fmInHFn^{ZKm(WM0AYe+MFF=?e^x$PxXq6lf5~ zZ|nrfaoYgcyfgDfa5>#&wS&@s6|tOPHYKYvHeTrdk!!Bcy|EZ6C|HY&@js53pga_B zHooC7h^*Pa>{_qNwN41FTTy#SXcQ^4tR>Sjz!O3Bz0(n1_lk6MKtSG(LG!tF; z){@0N_ae+vJ+c)a`B-VdAw)7VIUR|yzbBB!wNvWD@y46s%r+4!31=oJx!dmRhW-kl z*&E@8#HvUteQ>LrUcw@a8-aL*#^D4?U&=P)#pDxwWX>eck}dopXb};6H-mtDfwP%G zcf&s44eQ1KdM=k=#;wK&Q)$RQ%$vQm`1n~SBn7ZU3OBusYkvn8)2AvTo6N{UzKp+jZ+B2(V^09?vE1TQDbsMVZKj;=zK+ASR%MD#B8wKR zK!Z7>+AebVQcY~MshYrhJG$4?{;yqQoUvn1?;PBWA^hQ&L(|i5*gmUnaonj1T0DU7 zO}8f=V9QQD%g=+gNVo70 z+H^YEHe_XO`2UUEhu5IG(e$k{6IRMSMOV>E^W3 z`J-QJ0K&yB& z^Gq{&T;ddZF>WD4&-PKRBL#dO-oV*G2;P?*<78Uu%&^jaVg$9@01^nfSjf=DlIhaK zJbc|CjFxK!L}k9Y|DEnsj-0#I*mR`GuLRR^cQaFbI)o1#_6WPq%LZ!#W8A|D{N>9S z+U=jQ%_ml@|Jzbn0%P|_@7(iZz&t=7$2Cm>@N#oVwjx_==k5*>T>jyIClU<9_N7 zR-`u4)~6>9&ZSCe-~tLNP>9QHHAjw+-$JK01On_r;RhL6j{V@KY`l^xP@f+NN*fT( zF0iMVt~f8S6Nk(I0E@vW(Mbj0N&l?!j}cEfE}V`z%ymh*T`R@1O_Ym)O^_)*`_0O% zhk`097y9HF%i=$Yc$A5DLgKvKKL_brhutuUna9Q8`|HuBR~BnGlBq|F{W*znpIf__ z9d>lm=XbuVM?9VdrS5;a!4{Ej&%02OjCy4sOzNGx0qfy4GP>+0#Xg1 zHG1l-rEUNxD=T*HAvPvcTmc}Uq?Rx$fb&qptZPA^tf|{>DLmcD;EqI2`8|#yl^g@u z4*VnYdxdISqU8s*2a1%~Lhtq7*AxxN#}EX-yj{sIHGU*?A?&#*12F^FT4C-aqp|lt zny0bfDec_-0(g0ky>RbW6Cxj~Nd@72P^JNM4ivAd(@u)iq^z9=odujk;R;GL@;S=4EmtMs=r&xk(Ot(-p%NU4)XRd|q3YOzEk{Js z%a2a#X0wW782Nh^zg5SoVD-+W!DX2)4YqKbd1r7xs(*?zRAjY>! zQ;YL%3`;=o=2*ZaY3=l%Vdw2jS~SSLXuKC@)BZu&-1lqSU*A49KG)%2bdyX_H%?~r zc)Ihxd3T+}--{VY*j6jvLaoM9+&5gn_UPA)B-N&2;3VKS;zYxOKm|GkZ+2FuN zC;^l``Qxw}W4L9`&P(F!0#XOrD|0+IG{r3PjQqX3xN)~8;xroHyicZi8vc%eRuWty z!*9>$xX@|TZ;|j`tFgPD*Xx%a0I0WBgn$4kn@K_V?%^6MEU-d!rdC)qvoEI80LwD_ zv8x>w^f4Z|zx(OAlTQMK9~?JT7f+aaDs$5-x~ZzH)E0@JCi|4vByv>9WipmBE{5U< zER5s#5o8(gq1Nf0W_W=t(1a^?1jRA)?1o<#=AM>cNxV|s=-3teJmJ bytes: + kdf = PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + iterations=200_000, + ) + return kdf.derive(passphrase.encode()) + +def run_encrypted(path: str, passphrase: str): + with open(path, 'rb') as f: + raw = f.read() + if len(raw) < 28: + raise ValueError("Encrypted file too small/invalid") + salt = raw[:16] + nonce = raw[16:28] + ct = raw[28:] + key = derive_key(passphrase, salt) + aesgcm = AESGCM(key) + try: + plaintext = aesgcm.decrypt(nonce, ct, None) + except Exception as e: + raise ValueError("복호화 실패: 잘못된 키일 수 있습니다.") from e + code = plaintext.decode('utf-8', errors='replace') + compiled = compile(code, "", "exec") + exec(compiled, {"__name__": "__main__"}) + +if __name__ == "__main__": + enc_path = "blog_agent_scrap_base.enc" + passphrase = "1234" + try: + run_encrypted(enc_path, passphrase) + except Exception as e: + print("오류:", e) + sys.exit(1) diff --git a/research/tools/config.py b/research/tools/config.py index cb43350..148e1e0 100644 --- a/research/tools/config.py +++ b/research/tools/config.py @@ -1,4 +1,6 @@ OPENAI_API_KEY = "" GITHUB_TOKEN = "" GITHUB_URL = "https://github.com/Pseudo-Lab" -QUERY = "JobPT 레포에서 최신 커밋에 대해 요약 설명해줘" \ No newline at end of file +QUERY = "JobPT 레포에서 최신 커밋에 대해 요약 설명해줘" +BLOG_URL = "https://day-to-day.tistory.com/" +BLOG_QUERY = "이 블로그의 글들을 읽고 어떤 사람인지 알려주고 어떤 글들을 주로 썼는지 분석해줘." \ No newline at end of file From 7f9c2ce25bd9adfe12d0c89e18319c1201629146 Mon Sep 17 00:00:00 2001 From: MinwooKim1990 Date: Tue, 30 Sep 2025 18:14:13 +0900 Subject: [PATCH 2/2] ATS agent merged to backend --- .gitignore | 5 +- backend/ATS_agent/README.md | 175 +++ backend/ATS_agent/analyzers.py | 369 ++++++ backend/ATS_agent/ats_analyzer.py | 515 ++++++++ backend/ATS_agent/ats_analyzer_improved.py | 52 + backend/ATS_agent/ats_simulation.enc | Bin 0 -> 9444 bytes backend/ATS_agent/config.py | 104 ++ backend/ATS_agent/llm_handler.py | 176 +++ backend/ATS_agent/report_generator.py | 386 ++++++ backend/ATS_agent/upstage_parser.py | 52 + backend/ATS_agent/utils.py | 311 +++++ backend/api_test.py | 18 - backend/ats_analyzer_improved.py | 1393 -------------------- backend/main.py | 2 +- 14 files changed, 2145 insertions(+), 1413 deletions(-) create mode 100644 backend/ATS_agent/README.md create mode 100644 backend/ATS_agent/analyzers.py create mode 100644 backend/ATS_agent/ats_analyzer.py create mode 100644 backend/ATS_agent/ats_analyzer_improved.py create mode 100644 backend/ATS_agent/ats_simulation.enc create mode 100644 backend/ATS_agent/config.py create mode 100644 backend/ATS_agent/llm_handler.py create mode 100644 backend/ATS_agent/report_generator.py create mode 100644 backend/ATS_agent/upstage_parser.py create mode 100644 backend/ATS_agent/utils.py delete mode 100644 backend/api_test.py delete mode 100644 backend/ats_analyzer_improved.py diff --git a/.gitignore b/.gitignore index 027915f..b22b964 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,7 @@ resume_JD_similarity/data/** **lib** **chroma_db** **bin** -ui/public/uploads/** \ No newline at end of file +ui/public/uploads/** +backend/test_api.py +backend/test_ats.py +backend/ats_report.html \ No newline at end of file diff --git a/backend/ATS_agent/README.md b/backend/ATS_agent/README.md new file mode 100644 index 0000000..efae911 --- /dev/null +++ b/backend/ATS_agent/README.md @@ -0,0 +1,175 @@ +# ATS Resume Analyzer + +고급 AI 기반 이력서 ATS(Applicant Tracking System) 분석 도구입니다. 이력서와 채용 공고를 비교 분석하여 ATS 통과 가능성을 평가하고 개선 방안을 제시합니다. + +## 주요 기능 + +### 핵심 분석 기능 +- **키워드 매칭 분석**: 채용 공고의 핵심 키워드와 이력서 매칭 정도 평가 +- **경력 적합도 분석**: 요구 경력 및 자격 요건 충족도 평가 +- **형식 및 가독성 분석**: ATS 친화적 형식 및 구조 평가 +- **콘텐츠 품질 분석**: 성과 중심 서술 및 정량화 수준 평가 +- **산업 적합도 분석**: 해당 산업/직무에 대한 이해도 및 적합성 평가 + +### 고급 기능 +- **다국어 지원**: 한국어/영어 자동 감지 및 분석 +- **멀티 LLM 지원**: OpenAI GPT-4, Groq, Google Gemini 선택 가능 +- **시각적 보고서**: 레이더 차트와 HTML 기반 상세 보고서 +- **맞춤형 개선 제안**: 채용 공고별 구체적 개선 방안 제시 + +## 파일 구조 + +``` +validate_agent/ +├── ats_analyzer_improved.py# 메인 실행 파일 +├── ats_analyzer.py # 핵심 ATS 분석기 클래스 +├── analyzers.py # 개별 분석 모듈들 +│ # - KeywordAnalyzer: 키워드 매칭 +│ # - ExperienceAnalyzer: 경력 분석 +│ # - FormatAnalyzer: 형식 분석 +│ # - ContentAnalyzer: 콘텐츠 품질 +│ # - ErrorAnalyzer: 오류 검사 +│ # - IndustryAnalyzer: 산업 적합도 +│ # - CompetitiveAnalyzer: 경쟁력 분석 +├── ats_simulation.enc # ATS 키워드 시뮬레이션 +├── report_generator.py # HTML/텍스트 보고서 생성 +├── config.py # 설정 및 상수 +│ # - 언어별 패턴 및 템플릿 +│ # - 점수 가중치 설정 +├── utils.py # 유틸리티 함수 +│ # - 텍스트 정규화 및 언어 감지 +│ # - 마크다운 렌더링 +│ # - 폰트 설정 +├── llm_handler.py # LLM API 통합 관리 +│ # - OpenAI, Groq, Gemini 지원 +├── upstage_parser.py # 문서 파싱 (PDF/DOCX) +├── .env # API 키 설정 파일 +└── requirements.txt # 패키지 의존성 +``` + +## 설치 방법 + +### 1. 필수 패키지 설치 +```bash +pip install -r requirements.txt +``` + +### 2. API 키 설정 +`.env` 파일을 생성하고 다음 API 키를 설정합니다: + +```env +# OpenAI API (GPT-4) +OPENAI_API_KEY=your_openai_api_key_here + +# Groq API (선택사항) +GROQ_API_KEY=your_groq_api_key_here + +# Google Gemini API (선택사항) +GEMINI_API_KEY=your_gemini_api_key_here + +# Upstage Document Parser API +UPSTAGE_API_KEY=your_upstage_api_key_here +``` + +## 사용 방법 + +### 기본 실행 +```python +python ats_analyzer_improved.py +``` + +### 커스텀 설정 +`config.py`를 수정하여 설정을 변경할 수 있습니다: + +```python +# Configuration +CV_PATH = "이력서.pdf" +MODEL = 1 # 1=OpenAI, 2=Groq, 3=Gemini +ADVANCED = True # 고급 분석 수행 여부 +GENERATE_HTML = True # HTML 보고서 생성 여부 + +# Job description +JD_TEXT = """ +채용 공고 내용... +""" +``` + +### 프로그래밍 방식 사용 +```python +from ats_analyzer import ATSAnalyzer + +# 분석기 초기화 +analyzer = ATSAnalyzer( + cv_path="이력서.pdf", + jd_text="채용 공고 내용...", + model=1 # 1=OpenAI, 2=Groq, 3=Gemini +) + +# 분석 실행 +result = analyzer.run_full_analysis( + advanced=True, # 고급 분석 포함 + generate_html=True # HTML 보고서 생성 +) +``` + +## 분석 프로세스 + +### 1단계: 문서 추출 및 전처리 +- Upstage API를 통한 이력서 텍스트 추출 +- 언어 자동 감지 (한국어/영어) +- 텍스트 정규화 및 섹션 구조화 + +### 2단계: 채용 공고 분석 +- 필수/우대 자격 요건 추출 +- 핵심 키워드 및 중요도 평가 +- 기술 스택 및 소프트 스킬 파악 + +### 3단계: 다면적 분석 수행 +- **키워드 매칭**: 정확/부분 일치 키워드 분석 +- **경력 적합도**: 경력 연수, 학력, 산업 경험 +- **형식 평가**: ATS 친화적 구조 및 일관성 +- **콘텐츠 품질**: 정량화, 구체성, 성과 중심성 +- **산업 적합도**: 산업별 용어 및 트렌드 이해도 + +### 4단계: 보고서 생성 +- 5개 핵심 지표 레이더 차트 +- 섹션별 상세 분석 결과 +- 구체적 개선 권장사항 +- 경쟁력 평가 및 인터뷰 가능성 + +## 평가 지표 + +| 지표 | 가중치 | 설명 | +|------|--------|------| +| 키워드 적합도 | 25% | 채용 공고 키워드와의 매칭 정도 | +| 경력 적합도 | 20% | 요구 경력 및 자격 충족도 | +| 산업 적합도 | 15% | 산업/직무 특화 역량 | +| 콘텐츠 품질 | 5% | 서술의 구체성과 설득력 | +| 형식 | 3% | ATS 친화적 구조 | + +## 기술 스택 + +- **Python 3.8+** +- **LLM Integration**: OpenAI GPT-4.1-mini, Groq oss-120b, Google Gemini-2.5-flash +- **Document Parsing**: Upstage Document Parser API +- **Visualization**: Matplotlib +- **Reporting**: HTML/CSS, Markdown + +## 언어 지원 + +- **한국어**: 완전 지원 (분석, 보고서, UI) +- **영어**: 완전 지원 +- **자동 감지**: 이력서와 채용 공고 언어 자동 매칭 + +## 출력 예시 + +### HTML 보고서 구성 +1. **분석 요약**: 핵심 강점과 개선 필요 사항 +2. **레이더 차트**: 5개 핵심 지표 시각화 +3. **키워드 분석**: + - ✅ 일치한 키워드 + - ⚠️ 부분 일치 키워드 + - ❌ 누락된 키워드 +4. **섹션별 상세 분석**: 각 평가 항목별 구체적 피드백 +5. **개선 권장사항**: 우선순위별 구체적 개선 방안 +6. **경쟁력 평가**: 시장 경쟁력 및 인터뷰 가능성 \ No newline at end of file diff --git a/backend/ATS_agent/analyzers.py b/backend/ATS_agent/analyzers.py new file mode 100644 index 0000000..6d2bd41 --- /dev/null +++ b/backend/ATS_agent/analyzers.py @@ -0,0 +1,369 @@ +import json +import re + +try: + from ATS_agent.utils import extract_score +except ModuleNotFoundError: + from utils import extract_score + + +class KeywordAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + jd_analysis_str = "\n".join([ + "REQUIRED QUALIFICATIONS:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('required_qualifications', [])), + "PREFERRED QUALIFICATIONS:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('preferred_qualifications', [])), + "TECHNICAL SKILLS:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('technical_skills', [])), + "SOFT SKILLS:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('soft_skills', [])), + "INDUSTRY KNOWLEDGE:\n- " + "\n- ".join(self.analyzer.jd_analysis.get('industry_knowledge', [])) + ]) + + top_keywords = sorted(self.analyzer.jd_keywords, key=lambda x: x.get('importance', 0), reverse=True)[:20] + keywords_str = "\n".join([f"- {kw.get('keyword')} (Importance: {kw.get('importance')}/10, Category: {kw.get('category')})" + for kw in top_keywords]) + + score_context = self.analyzer._localized_context( + "how well the resume matches the job description's keywords and requirements", + "이력서가 채용 공고의 키워드와 요구 사항에 얼마나 부합하는지" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Analyze how well this resume matches the key requirements and keywords from the job description. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION ANALYSIS: + {jd_analysis_str} + + TOP KEYWORDS FROM JOB DESCRIPTION: + {keywords_str} + + RESUME: + {self.analyzer.preprocessed_cv} + + Please provide a detailed analysis with the following: + + 1. TECHNICAL SKILLS MATCH: Evaluate how well the resume matches the required technical skills + 2. QUALIFICATIONS MATCH: Evaluate how well the resume matches required and preferred qualifications + 3. SOFT SKILLS MATCH: Evaluate how well the resume demonstrates the required soft skills + 4. EXPERIENCE MATCH: Evaluate how well the resume satisfies experience requirements + 5. KEYWORD ANALYSIS: Create a table showing matched and missing keywords, with their importance + + For each category, provide specific examples from both the job description and resume. + Calculate a match percentage for each category, and provide an overall keyword match score. + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Keywords analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Keywords score:", score) + + self.analyzer.analysis_results['keywords'] = response + self.analyzer.scores['keywords'] = score + + +class ExperienceAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Analyze how well the resume's experience and qualifications match the job requirements""" + score_context = self.analyzer._localized_context( + "how well the candidate's experience and qualifications match the job requirements", + "후보자의 경력과 자격이 채용 공고의 요구 사항과 얼마나 일치하는지" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Evaluate how well the candidate's experience and qualifications match the job requirements: + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + RESUME: + {self.analyzer.preprocessed_cv} + + Please provide a detailed analysis of: + 1. Required years of experience vs. candidate's experience + 2. Required education level vs. candidate's education + 3. Required industry experience vs. candidate's industry background + 4. Required responsibilities vs. candidate's demonstrated capabilities + 5. Required achievements vs. candidate's accomplishments + + + For each area, indicate whether the candidate exceeds, meets, or falls short of requirements. + Provide specific examples from both the job description and resume. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Experience analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Experience score:", score) + + self.analyzer.analysis_results['experience'] = response + self.analyzer.scores['experience'] = score + + +class FormatAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Analyze the resume's format, structure, and readability""" + score_context = self.analyzer._localized_context( + "the quality of the resume's format and readability", + "이력서 형식과 가독성의 품질" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Evaluate the format, structure, and readability of the following resume: + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + RESUME: + {self.analyzer.preprocessed_cv} + + Please analyze: + 1. Overall organization and structure + 2. Readability and clarity + 3. Use of bullet points, sections, and white space + 4. Consistency in formatting (dates, job titles, etc.) + 5. Grammar, spelling, and punctuation + 6. ATS-friendliness of the format + + + Provide specific examples of strengths and weaknesses in the format. + Suggest specific improvements to make the resume more ATS-friendly and readable. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Format analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Format score:", score) + + self.analyzer.analysis_results['format'] = response + self.analyzer.scores['format'] = score + + +class ContentAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Analyze the quality of content in the resume""" + score_context = self.analyzer._localized_context( + "the quality of the resume's content", + "이력서 콘텐츠의 전반적인 품질" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Evaluate the quality of content in the following resume: + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + RESUME: + {self.analyzer.preprocessed_cv} + + Please analyze: + 1. Use of strong action verbs and achievement-oriented language + 2. Quantification of achievements (metrics, percentages, numbers) + 3. Specificity vs. vagueness in descriptions + 4. Relevance of included information + 5. Balance between technical details and high-level accomplishments + 6. Presence of clichés or generic statements vs. unique value propositions + + + Provide specific examples from the resume for each point. + Suggest specific improvements to strengthen the content quality. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Content analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Content score:", score) + + self.analyzer.analysis_results['content'] = response + self.analyzer.scores['content'] = score + + +class ErrorAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Check for errors, inconsistencies, and red flags in the resume""" + score_context = self.analyzer._localized_context( + "how error-free and consistent the resume is (100 = perfect, no issues)", + "이력서의 오류 및 일관성 수준(100 = 완벽, 문제 없음)" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Analyze the following resume for errors, inconsistencies, and potential red flags: + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + RESUME: + {self.analyzer.preprocessed_cv} + + Please identify and explain: + 1. Spelling and grammar errors + 2. Inconsistencies in dates, job titles, or other information + 3. Unexplained employment gaps + 4. Formatting inconsistencies + 5. Potential red flags that might concern employers + + + For each issue found, provide the specific text from the resume and suggest a correction. + If no issues are found in a category, explicitly state that. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + print("[DEBUG] Errors analysis LLM response:\n", response[:300], "...") + + score = extract_score(response) + print("[DEBUG] Errors score:", score) + + self.analyzer.analysis_results['errors'] = response + self.analyzer.scores['errors'] = score + + +class IndustryAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Perform industry and job role specific analysis""" + # First, identify the industry and job role + industry_prompt = f""" + Based on the following job description, identify the specific industry and job role. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + Format your response as a JSON object with this structure: + {{"industry": "Technology", "job_role": "Software Engineer"}} + + + Be specific about both the industry and job role. + """ + + response = self.analyzer.call_llm(industry_prompt, model=self.analyzer.model) + + try: + json_match = re.search(r'\{\s*"industry"\s*:.+?\}', response, re.DOTALL) + if json_match: + response = json_match.group(0) + + job_info = json.loads(response) + industry = job_info.get('industry', 'General') + job_role = job_info.get('job_role', 'General') + except Exception as e: + print(f"Error parsing industry JSON: {e}") + industry = "Technology" + job_role = "Professional" + + score_context = self.analyzer._localized_context( + "how well the resume aligns with this specific industry and role", + "이력서가 해당 산업과 직무에 얼마나 적합한지" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + industry_analysis_prompt = f""" + Analyze this resume for a {job_role} position in the {industry} industry. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + RESUME: + {self.analyzer.preprocessed_cv} + + Please provide an industry-specific analysis considering: + 1. Industry-specific terminology and keywords in the resume + 2. Relevant industry experience and understanding + 3. Industry-specific certifications and education + 4. Industry trends awareness + 5. Industry-specific achievements and metrics + + + For each point, evaluate how well the resume demonstrates industry alignment. + Provide specific recommendations for improving industry relevance. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(industry_analysis_prompt, model=self.analyzer.model) + score = extract_score(response) + + self.analyzer.analysis_results['industry_specific'] = response + self.analyzer.scores['industry_specific'] = score + + +class CompetitiveAnalyzer: + def __init__(self, analyzer): + self.analyzer = analyzer + + def analyze(self): + """Analyze the competitive position of this resume in the current job market""" + score_context = self.analyzer._localized_context( + "how well this resume would compete against other candidates", + "이력서가 다른 지원자와 비교했을 때 어느 정도 경쟁력을 갖는지" + ) + score_instruction = self.analyzer._score_instruction_text(score_context) + + prompt = f""" + Analyze how competitive this resume would be in the current job market for this position. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + RESUME: + {self.analyzer.preprocessed_cv} + + Please provide a competitive analysis including: + + + 1. MARKET COMPARISON: How this resume compares to typical candidates for this role + 2. STANDOUT STRENGTHS: The most impressive qualifications compared to the average candidate + 3. COMPETITIVE WEAKNESSES: Areas where the candidate may fall behind competitors + 4. DIFFERENTIATION FACTORS: Unique elements that set this resume apart (positively or negatively) + 5. HIRING PROBABILITY: Assessment of the likelihood of getting an interview (Low/Medium/High) + + + Base your analysis on current job market trends and typical qualifications for this role and industry. + Be honest but constructive in your assessment. + + + {score_instruction} + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + score = extract_score(response) + + self.analyzer.analysis_results['competitive'] = response + self.analyzer.scores['competitive'] = score + return response \ No newline at end of file diff --git a/backend/ATS_agent/ats_analyzer.py b/backend/ATS_agent/ats_analyzer.py new file mode 100644 index 0000000..1910a56 --- /dev/null +++ b/backend/ATS_agent/ats_analyzer.py @@ -0,0 +1,515 @@ +import os +import re +import json +import time +from dotenv import load_dotenv + +try: + from ATS_agent.config import LANGUAGE_SECTION_PATTERNS, LANGUAGE_SCORE_TEMPLATES, LANGUAGE_HTML_LABELS + from ATS_agent.utils import ( + normalize_text, detect_language, advanced_preprocessing, + extract_resume_sections, extract_score + ) + from ATS_agent.llm_handler import LLMHandler + from ATS_agent.analyzers import ( + KeywordAnalyzer, ExperienceAnalyzer, FormatAnalyzer, + ContentAnalyzer, ErrorAnalyzer, IndustryAnalyzer, CompetitiveAnalyzer + ) + from ATS_agent.report_generator import ReportGenerator +except ModuleNotFoundError: + from config import LANGUAGE_SECTION_PATTERNS, LANGUAGE_SCORE_TEMPLATES, LANGUAGE_HTML_LABELS + from utils import ( + normalize_text, detect_language, advanced_preprocessing, + extract_resume_sections, extract_score + ) + from llm_handler import LLMHandler + from analyzers import ( + KeywordAnalyzer, ExperienceAnalyzer, FormatAnalyzer, + ContentAnalyzer, ErrorAnalyzer, IndustryAnalyzer, CompetitiveAnalyzer + ) + from report_generator import ReportGenerator + +import getpass +from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.ciphers.aead import AESGCM +import sys + +def derive_key(passphrase: str, salt: bytes) -> bytes: + kdf = PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + iterations=200_000, + ) + return kdf.derive(passphrase.encode()) + +def run_encrypted(path: str, passphrase: str): + with open(path, 'rb') as f: + raw = f.read() + if len(raw) < 28: + raise ValueError("Encrypted file too small/invalid") + salt, nonce, ct = raw[:16], raw[16:28], raw[28:] + key = derive_key(passphrase, salt) + aesgcm = AESGCM(key) + plaintext = aesgcm.decrypt(nonce, ct, None) + code = plaintext.decode('utf-8', errors='replace') + + local_ns = {} + compiled = compile(code, "", "exec") + exec(compiled, {"re": re}, local_ns) + return local_ns + + +class ATSAnalyzer: + def __init__(self, cv_path, jd_text, model=1): + self.cv_path = cv_path + self.jd_text = jd_text + self.cv_text = "" + self.preprocessed_cv = "" + self.preprocessed_cv_lower = "" + self._cv_text_no_space = "" + self.structured_cv = {} + self.jd_analysis = {} + self.jd_requirements = [] + self.jd_keywords = [] + self.analysis_results = {} + self.scores = {} + self.final_report = "" + self.improvement_suggestions = "" + self.competitive_analysis = "" + self.optimized_resume = "" + self.llm_call_count = 0 + self.total_tokens = 0 + self.total_time = 0 + self.model = model + self.language = 'en' + self.section_patterns = LANGUAGE_SECTION_PATTERNS[self.language] + self._score_template = LANGUAGE_SCORE_TEMPLATES[self.language] + + self.llm_handler = LLMHandler() + + load_dotenv() + + self.jd_text = normalize_text(self.jd_text) + + self.keyword_analyzer = KeywordAnalyzer(self) + self.experience_analyzer = ExperienceAnalyzer(self) + self.format_analyzer = FormatAnalyzer(self) + self.content_analyzer = ContentAnalyzer(self) + self.error_analyzer = ErrorAnalyzer(self) + self.industry_analyzer = IndustryAnalyzer(self) + self.competitive_analyzer = CompetitiveAnalyzer(self) + #self.ats_simulator = ATSSimulator(self) + self.report_generator = ReportGenerator(self) + + def _normalize_text(self, text): + return normalize_text(text) + + def _apply_language_settings(self, language): + self.language = language if language in LANGUAGE_SECTION_PATTERNS else 'en' + self.section_patterns = LANGUAGE_SECTION_PATTERNS[self.language] + self._score_template = LANGUAGE_SCORE_TEMPLATES.get(self.language, LANGUAGE_SCORE_TEMPLATES['en']) + + def _score_phrase_template(self): + return self._score_template + + def _score_instruction_text(self, context): + template = self._score_phrase_template().format(score='XX') + if self.language == 'ko': + return ( + f'분석을 마칠 때는 "{template}" 형식으로 마무리하고, ' + f'{context} 0-100 범위의 점수를 제시하세요.' + ) + return ( + f'End your analysis with "{template}" where XX is a score from 0-100 ' + f'representing {context}.' + ) + + def _format_score_line(self, score): + safe_score = max(0, min(100, int(round(score)))) + return self._score_template.format(score=safe_score) + + def _html_label(self, key, default): + return LANGUAGE_HTML_LABELS.get(self.language, {}).get(key, default) + + def _localized_context(self, english_text, korean_text): + return korean_text if self.language == 'ko' else english_text + + def _score_value(self, key, default=0.0): + value = self.scores.get(key, default) + try: + return float(value) + except (TypeError, ValueError): + return default + + def _evaluate_keyword_match(self, keyword): + if not keyword: + return 'none', 0.0 + + normalized_keyword = normalize_text(keyword).strip() + if not normalized_keyword: + return 'none', 0.0 + + keyword_lower = normalized_keyword.lower() + cv_text_lower = getattr(self, 'preprocessed_cv_lower', '') + if not cv_text_lower: + return 'none', 0.0 + + boundary_pattern = rf'\b{re.escape(keyword_lower)}\b' + if re.search(boundary_pattern, cv_text_lower, flags=re.IGNORECASE): + return 'exact', 1.0 + + if self.language == 'ko': + if keyword_lower in cv_text_lower: + return 'exact', 1.0 + + keyword_compact = re.sub(r'\s+', '', keyword_lower) + cv_compact = getattr(self, '_cv_text_no_space', '') + if keyword_compact and keyword_compact in cv_compact: + return 'exact', 1.0 + + tokens = [token for token in re.split(r'[\s/·•,]+', keyword_lower) if token] + if len(tokens) > 1: + matched_tokens = sum(1 for token in tokens if token and token in cv_text_lower) + match_ratio = matched_tokens / len(tokens) + if match_ratio >= 0.7: + return 'partial', match_ratio + + return 'none', 0.0 + + def extract_and_preprocess(self): + text = "" + upstage_available = False + + try: + # 절대/상대 경로 모두 지원 + try: + from parser import run_parser + except ImportError: + import sys + backend_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + if backend_dir not in sys.path: + sys.path.insert(0, backend_dir) + from parser import run_parser + upstage_available = True + except ImportError: + print("Warning: upstage_parser not found, using fallback text extraction") + + if upstage_available: + try: + result = run_parser(self.cv_path) + + if isinstance(result, tuple): + if len(result) >= 3: + contents, coordinates, full_contents = result + if full_contents is None: + print("Warning: upstage_parser returned None (API error)") + text = "" + else: + text = full_contents if full_contents else "" + elif len(result) == 2: + contents, full_contents = result + text = full_contents if full_contents else "" + else: + text = str(result[0]) if result[0] else "" + else: + text = str(result) if result else "" + + if not text or text == "None": + print("Warning: Empty or invalid response from upstage_parser") + text = "" + + except KeyError as e: + print(f"Warning: Missing key in upstage_parser response: {e}") + if os.path.exists(self.cv_path): + try: + with open(self.cv_path, 'r', encoding='utf-8') as f: + text = f.read() + except: + text = "" + except Exception as e: + print(f"Warning: Error using upstage_parser: {e}") + if os.path.exists(self.cv_path): + try: + with open(self.cv_path, 'r', encoding='utf-8') as f: + text = f.read() + except: + text = "" + else: + if os.path.exists(self.cv_path): + try: + with open(self.cv_path, 'r', encoding='utf-8') as f: + text = f.read() + except: + text = "" + else: + text = self.cv_path + + if not text: + print("Warning: No text extracted from resume. Using placeholder text for analysis.") + text = "Resume content not available for analysis." + + self.cv_text = normalize_text(text.strip()) + + detected_language = detect_language(f"{self.cv_text} {self.jd_text}") + self._apply_language_settings(detected_language) + + self.structured_cv = extract_resume_sections(self.cv_text, self.section_patterns) + + self.preprocessed_cv = advanced_preprocessing(self.cv_text) + self.preprocessed_cv_lower = self.preprocessed_cv.lower() + self._cv_text_no_space = re.sub(r'\s+', '', self.preprocessed_cv_lower) + + self.analyze_job_description() + + print(f"Extracted {len(self.cv_text)} characters from resume") + print(f"Identified {len(self.structured_cv)} sections in the resume") + print(f"Analyzed job description with {len(self.jd_keywords)} keywords extracted") + + def analyze_job_description(self): + """ + Analyze the job description to extract requirements, keywords, and other important information + This is a critical step to ensure the ATS analysis is specific to this particular job + """ + jd_analysis_prompt = f""" + Perform a detailed analysis of this job description to extract all information that would be used by an ATS system. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.jd_text} + + Please provide a comprehensive analysis with the following components: + + 1. REQUIRED QUALIFICATIONS: All explicitly stated required qualifications (education, experience, certifications, etc.) + 2. PREFERRED QUALIFICATIONS: All preferred or desired qualifications that are not strictly required + 3. KEY RESPONSIBILITIES: The main job duties and responsibilities + 4. TECHNICAL SKILLS: All technical skills, tools, languages, frameworks, etc. mentioned + 5. SOFT SKILLS: All soft skills, personal qualities, and character traits mentioned + 6. INDUSTRY KNOWLEDGE: Required industry-specific knowledge or experience + 7. COMPANY VALUES: Any company values or culture fit indicators mentioned + + Format your response as a valid JSON object with these categories as keys, and arrays of strings as values. + Also include a "keywords" array with all important keywords from the job description, each with an importance score from 1-10. + + The JSON must be properly formatted with no errors. Make sure all quotes are properly escaped and all arrays and objects are properly closed. + + Example format: + {{"required_qualifications": ["Bachelor's degree in Computer Science", "5+ years of experience"], + "preferred_qualifications": ["Master's degree", "Experience with cloud platforms"], + "key_responsibilities": ["Develop software applications", "Debug and troubleshoot issues"], + "technical_skills": ["Python", "JavaScript", "AWS"], + "soft_skills": ["Communication", "Teamwork"], + "industry_knowledge": ["Financial services", "Regulatory compliance"], + "company_values": ["Innovation", "Customer focus"], + "keywords": [{{"keyword": "Python", "importance": 9, "category": "Technical Skill"}}, {{"keyword": "Bachelor's degree", "importance": 8, "category": "Education"}}] + }} + + Return ONLY the JSON object with no additional text before or after. + """ + + response = self.call_llm(jd_analysis_prompt, model=self.model) + + try: + start_idx = response.find('{') + end_idx = response.rfind('}') + + if start_idx >= 0 and end_idx >= 0: + response = response[start_idx:end_idx+1] + + try: + self.jd_analysis = json.loads(response) + except json.JSONDecodeError as e: + print(f"Initial JSON parsing failed: {e}") + print("Attempting to fix JSON format...") + + response = response.replace("'", '"') + response = re.sub(r',\s*}', '}', response) + response = re.sub(r',\s*]', ']', response) + + self.jd_analysis = json.loads(response) + + self.jd_keywords = self.jd_analysis.get('keywords', []) + + self.jd_requirements = ( + self.jd_analysis.get('required_qualifications', []) + + self.jd_analysis.get('preferred_qualifications', []) + + self.jd_analysis.get('technical_skills', []) + + self.jd_analysis.get('soft_skills', []) + + self.jd_analysis.get('industry_knowledge', []) + ) + + print(f"Successfully parsed JD analysis with {len(self.jd_keywords)} keywords") + + except Exception as e: + print(f"Error parsing JD analysis JSON: {e}") + print(f"Raw response: {response[:500]}...") + + print("Creating default JD analysis structure with dummy data") + self.jd_analysis = { + "required_qualifications": ["Master's degree", "1+ years of experience"], + "preferred_qualifications": ["PhD", "Industry experience"], + "key_responsibilities": ["Research", "Development", "Collaboration"], + "technical_skills": ["Python", "Machine Learning", "Deep Learning"], + "soft_skills": ["Communication", "Teamwork"], + "industry_knowledge": ["AI Research", "Software Development"], + "company_values": ["Innovation", "Collaboration"], + "keywords": [ + {"keyword": "Python", "importance": 9, "category": "Technical Skill"}, + {"keyword": "Machine Learning", "importance": 8, "category": "Technical Skill"}, + {"keyword": "Research", "importance": 7, "category": "Experience"}, + {"keyword": "Master's degree", "importance": 8, "category": "Education"} + ] + } + self.jd_keywords = self.jd_analysis["keywords"] + self.jd_requirements = ( + self.jd_analysis["required_qualifications"] + + self.jd_analysis["preferred_qualifications"] + + self.jd_analysis["technical_skills"] + + self.jd_analysis["soft_skills"] + + self.jd_analysis["industry_knowledge"] + ) + + def call_llm(self, prompt, model=None): + """Call the LLM API with the given prompt""" + if model is None: + model = self.model + response = self.llm_handler.call_llm(prompt, model, self.language) + stats = self.llm_handler.get_statistics() + self.llm_call_count = stats['llm_call_count'] + self.total_tokens = stats['total_tokens'] + return response + + def analyze_keywords(self): + """Analyze how well the resume matches key terms in the job description""" + self.keyword_analyzer.analyze() + + def analyze_experience_and_qualifications(self): + """Analyze how well the resume's experience and qualifications match the job requirements""" + self.experience_analyzer.analyze() + + def analyze_format_and_readability(self): + """Analyze the resume's format, structure, and readability""" + self.format_analyzer.analyze() + + def analyze_content_quality(self): + """Analyze the quality of content in the resume""" + self.content_analyzer.analyze() + + def check_errors_and_consistency(self): + """Check for errors, inconsistencies, and red flags in the resume""" + self.error_analyzer.analyze() + + # def simulate_ats_filtering(self): + # """Simulate how an actual ATS system would evaluate this resume""" + # #self.ats_simulator.simulate() + def simulate_ats_filtering(self): + current_dir = os.path.dirname(os.path.abspath(__file__)) + enc_path = os.path.join(current_dir, "ats_simulation.enc") + passphrase = "ats_simulation" + try: + ns = run_encrypted(enc_path, passphrase) + if "ATSSimulator" in ns: + self.ats_simulator = ns["ATSSimulator"](self) + self.ats_simulator.simulate() + except Exception as e: + print("오류:", e) + sys.exit(1) + + + def analyze_industry_specific(self): + """Perform industry and job role specific analysis""" + self.industry_analyzer.analyze() + + def analyze_competitive_position(self): + """Analyze the competitive position of this resume in the current job market""" + return self.competitive_analyzer.analyze() + + def suggest_resume_improvements(self): + """Generate specific suggestions to improve the resume for this job""" + return self.report_generator.generate_improvement_suggestions() + + def generate_optimized_resume(self): + """Generate an optimized version of the resume tailored to the job description""" + return self.report_generator.generate_optimized_resume() + + def generate_final_score_and_recommendations(self): + """Generate final score with weighted categories and overall recommendations""" + self.report_generator.generate_final_score_and_recommendations() + + def generate_visual_report(self, output_path="ats_report.html"): + """Generate a visual HTML report with charts and formatted analysis""" + return self.report_generator.generate_visual_report(output_path) + + def generate_text_report(self): + """Generate a text-based report of the analysis""" + return self.report_generator.generate_text_report() + + def extract_score(self, response_text): + """Extract score from LLM response""" + return extract_score(response_text) + + def run_full_analysis(self, advanced=True, generate_html=True): + """ + Run the complete resume analysis + + Args: + advanced (bool): Whether to run advanced analyses + generate_html (bool): Whether to generate HTML report + + Returns: + str: Path to the report or text report + """ + start_time = time.time() + + print("Starting ATS analysis for this specific job description...") + + self.extract_and_preprocess() + + print(f"Analyzing resume against {len(self.jd_keywords)} job-specific keywords...") + + self.analyze_keywords() + self.analyze_experience_and_qualifications() + self.analyze_format_and_readability() + self.analyze_content_quality() + self.check_errors_and_consistency() + + if advanced: + print("Running advanced ATS simulation...") + self.simulate_ats_filtering() + self.analyze_industry_specific() + self.analyze_competitive_position() + + print("Generating job-specific improvement suggestions...") + self.suggest_resume_improvements() + + print("Calculating final ATS score for this job...") + self.generate_final_score_and_recommendations() + + self.total_time = time.time() - start_time + print(f"Analysis completed in {self.total_time:.1f} seconds") + + self.print_usage_statistics() + + if generate_html: + print("Generating visual HTML report...") + report_path = self.generate_visual_report() + print(f"HTML report generated: {report_path}") + return report_path + else: + return self.generate_text_report() + + def print_usage_statistics(self): + """Print usage statistics to console""" + print("\n===== USAGE STATISTICS =====") + print(f"LLM API Calls: {self.llm_call_count}") + print(f"Total Tokens Used: {self.total_tokens}") + print(f"Analysis Time: {self.total_time:.2f} seconds") + + print("\n===== SCORE BREAKDOWN =====") + print(f"Keywords Match: {self.scores.get('keywords', 0)}/100") + print(f"Experience Match: {self.scores.get('experience', 0)}/100") + print(f"Format & Readability: {self.scores.get('format', 0)}/100") + print(f"Content Quality: {self.scores.get('content', 0)}/100") + print(f"Industry Alignment: {self.scores.get('industry_specific', 0)}/100") + print("============================\n") \ No newline at end of file diff --git a/backend/ATS_agent/ats_analyzer_improved.py b/backend/ATS_agent/ats_analyzer_improved.py new file mode 100644 index 0000000..5f03f77 --- /dev/null +++ b/backend/ATS_agent/ats_analyzer_improved.py @@ -0,0 +1,52 @@ +import os +import sys + +try: + from ATS_agent.ats_analyzer import ATSAnalyzer + from ATS_agent.config import CV_PATH, MODEL, ADVANCED, GENERATE_HTML, JD_TEXT +except (ModuleNotFoundError, ImportError) as e: + current_dir = os.path.dirname(os.path.abspath(__file__)) + if current_dir not in sys.path: + sys.path.insert(0, current_dir) + + from ats_analyzer import ATSAnalyzer + from config import CV_PATH, MODEL, ADVANCED, GENERATE_HTML, JD_TEXT + + +def main(): + cv_path = CV_PATH + model = MODEL + advanced = ADVANCED + generate_html = GENERATE_HTML + + # Job description + jd_text = JD_TEXT + + if not os.path.exists(cv_path): + print(f"Warning: Resume file '{cv_path}' not found.") + print("Please provide a valid resume file path.") + print("\nUsage: python main.py") + print("Edit the cv_path variable in main.py to point to your resume file.") + return + + try: + print("Initializing ATS Analyzer...") + analyzer = ATSAnalyzer(cv_path, jd_text, model=model) + + print("Starting analysis...") + result = analyzer.run_full_analysis(advanced=advanced, generate_html=generate_html) + + if not generate_html: + print(result) + else: + print(f"\n분석 완료! 보고서가 저장된 경로: {result}") + print("웹 브라우저에서 HTML 파일을 열어 전체 보고서를 확인하세요.") + + except Exception as e: + print(f"Error during analysis: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/ATS_agent/ats_simulation.enc b/backend/ATS_agent/ats_simulation.enc new file mode 100644 index 0000000000000000000000000000000000000000..248c7c2dc596b7adc7f12fdf33ad483b68ba7fff GIT binary patch literal 9444 zcmV@ou`LgP%P0^vBG;3(6;`Rg#xAPelEA7H~%urY~?vW<%#zVMp z3t-)?eW6_$pQ1=J+@C%{=w-vGQD--b393JcOdRU;TE`}l|Nr|~Vsg0z8K8$hy;dd) zB&ej+l*3IlyiEp*$2`)h`4gCQ?1ICfo9_~&Y%Oa1?-T+BXA^|#sgt2nw!Y!|*#sZ3CH#!WU=%K$kMbcP$(>UEM zn~oeP;=iRmQPH0fmzea&Zx3=kT3{lp!LuIzQj)8q&TSwLSzb=ScgQq^p#JN`Vz!y! zC`ZvW68?^Q;R_4r-h4Jg;>AMn(5YO-JAM>2QQG zY*y4i!y*5wpEhKuQh*7QsXmp_MGVpC_fzXpGr^zs-Z@9C1f5y@0Xu$1Tkp*GmS#e4 z?Q9YBX>G0BF&vmt8rZqPYkYTHh7}UasfUUNYex!fgN*3i1AxgKL|Rb=WE`;L?u(Ji zYlRra~_n6f)P77qfY8=Z|oB6P#z1h9mRBP01 zLS^KNeF(?}vJ@nkP~nQ96w%X2D4_0hs>z{P3H)$Wed+fI#Pr==+3Hl|N18 z`$+|U2;%$(DLpVdtCY)Z;yw3AB6&LS0O^Kvm|y-DqB_-9qmSbeAV zA!&g~t@J&mgS|-Q_*i6%mHazV8Z1r#GwufP5QwlJLOnOLKpPVD@4wX{+QW|AMLi`Z zez1`*-=Yj#w61PmeJiV^p3T6k%Co*D-lRv_h%@E6cpeg~aBG0k`l?`Zt%#Zgv5W(J z0dTk+uh?9j4hpD#5ACgQrA9EO6c?~Qu#5c{RMye1hLp!$=mm{cjkO6rP2A8_*Gk5f z1z2nFd)wXrRPFUxz^Klx-6G?-p{vl+9{$NNm9~BJRZ%|!JOJ?(I*(N4`?&-6|0gtq zzC$`R;itDi3vUtBThtF>>c0KGI<>1(vGC7*|b<-G+P72^QUD;tOU>Z#Xqq1svp z^r*j=jW!Ji!{1I2xNwHyIi=@#|H-!PUvZ=_zI?$n)33Q0t!d(k;T;8noZkiq-$n>$ z+X!Yqp1}=DdaQFoOQAu6O910zXz$#$x@9bQJRShy-yQEW!hPCP9$xA^Ag)$1i)=Et z0=CFwN=dAEzR}UvvN{D?wh?&09TESpLwA+9!$HJ;^lOgKfTYiLu9PfPQ-g&_jE2MD zrfGHZ8N9gKvcAVT*#Y}J21A$bfSM)p6|j#>liahI*4iHYqu^7W>EB~}-_z5`m0Kt3 z`pGV{S<1d3nO5>bD-WT~OWWQOyTEeHvc+HnlcF2?um6qVQ$sf{*wHCa@M2nZHZ+?$Z+dS^YuI0#L1IFr4N@ z1hE15vBQ`eA1xt3%90b847YLWDk}q8j4XBgy~CYl8EW&zBKE>Yq_1q$#zo~5hKUC? z*k>7ah^2f0Z6w5!n-Z_4;?zR&Mj-t&ge}P#ishO~4zdt%1QY&M2Iw%egP=ALt|%to zzdgunz2KiEvb{N4fvf2-C7Zqubw#!=qgss2>x)yJkGW);^nKB&nQ?vPH#xE3dI6dM zZHd?6g>ZBc^SYkam&LKM6Dzvl6Fi-Ql+mCq=t<^s*>J~BnU7$QMaPV5e;Y%Zfqkkj zkdcJ`adw$$E)PW&E35yr%K098Z}EYO;=J>O^8+6v`$O(aW$us&;8kqkYx(VAJjVEh zBlq`F&={534?9B zG9)F66dHJQ6A@q!M*ykh(Z7!Ha3Q3zBWM15YGL!0B*fO?0EM*o5#-VXi~Z;L-jC(; zMG#43nIMROB_Re;uCWj=2PBD_R{p9;vL+WX?MEh)~h_59p z(uQE+Q?1Ycw zb{Yaj;kyH;O5wu|8sc?T5j5N}6DQjf`TJ5FGl|4d*hVal(OV;;}zctIv16y zN&UXxeyoiWiXqW&`hEP%=W7VZTj8UA0jxwmpW0iIXkwwV)#8Xwqsy`w+|n*AwY0+n zh=O%Fu{(^JYruOgkBXwtj*cn12FR~PJz@2meB!~N|Igj&+2@Vqi`3?I>;DG?5)HnX zGlJ{i)t+^Nags~J)sHDp>_C9S$}M_Hl7~ZnL%t^K(;*5hW;;CC4@)TXZ42SA^0~+9;)%nk^%OVCRO#?bY4wnS3WKest+gT4*=O>%fq^fb* z9wTD^{v-?X)iMq^)vd;P60*I}t|HDB6Psx0+c@Q^M3+B$58LcYnww@T_;zC#A5-}{ zxHH4Hqukh*HkG~I;6p-!cgK)X8qeo@G3A_8s_4b$gD8Ul5|=n*%-b=_xYhYPExnQP zm!%h|)uqs1lDX=IF{y#x`S&2C3&aS6%MpIEYA@@cBzP)H*Q3Grn>58A)RKu>+{;-o z6O^Q6v?A^@&kdR*;%p{TXgLnz!x5{pEntpAQJR-aLSBP32Zn}Q8+Tfq!=;j?o$Q05 zr*El{8FKErZK|cu7P0`-s30?D`S@R*Y_6tN4m`OL-@p`Q*XJ(G5(B|9Xs<3~Pm@lU z)<#DWi-JeLo0&)`I9Ey@Gm{a}Q4%uyozttzRmAR9Qd&58Jj@OdN#M4`(ZV#C>_l&CMyS}6 zPAs&u0pPmd>Aqu+Gue6v1d;Pvj2|8Xq^Yss0oak_LM;pwRiiT?-uA}@d}^8=C5E#8 z)fI}gmIIjpvnWa+Ly-a{M;`HBi{2JTJv(7q837rqj5xC-xBbeP!h_@2tUNyFJT_kXy`gPg z%*X)+wWMu1q3+1`$+uC`CiDUwM%7C_CWM4(Uk^~2=n>u?th%YbM23V6iSxRjpXS_5 zWgmx`40>dW7!0#1p-gC%3POYBcZUy40e5rlk1~d;TYQ}B#PU)aI{9;|spO`#>gUT@827oqcVv1ZsK(7G1(U@VI8fl&`$fRxLc%Qm76ZC|o zs>8dJl9a^_Ks{r8Vle{BIEOAGtu`TPBF(Ju>@k^wt+vmE5Fr(ARZ;VB&n9ny2IV9j zBWa>K!K2=I#-g^`GDa{ljBIDt4v7yZBwwO4(oAogZU;f4;*%2!L1=Jq4@9|r(-!{3 z+}N2GbzY9l;r!z*zceaBW_A$}^h85Wrwx6E;dlh=n@*$Cm(wXpOJ3d zL)RRpUV+5oe)lXG=hu1Z)%KKbOi3&9Ez&-WGPR8h^@GA)=_Weu?McSEtk*wnSG`3^ zdaAmehKXr!w=m&!87WbmQDWPp*Iv_vw>@!>8byR|#Ih&tB%U5&#j3)(9G{vcAdw<+ zXnqGG*K)UMS3OhgNhV?O%)S*Pv1|#4iH4u-& z$efYC22Rm5g759IzXFJxhEUx<(djb&_zf=F7dYT%t)v+hU2Y%|D<>_+9T2(b_JV-d zp=L-xn)FMHzguNa!b36XR&L+neW!yzd%Q1_tr`*=QD`}Y{CE{js7cb0K@IGR3SK&b9y4ru_^?1jH-g5;1U)N zMLmw~N+1eHE}@d|1EiNjFds*g;t1lIiCxWZ;#uE?!BFBK8IWTkO@%ILoB_ChM73HR zTgh(8VceZd|KumNBOhMO)s@hu36I_H`kHd$7f3YZ5;N!F)JhMC{#4+TqnO68q7Il> zd$m{|jFKVo`*vT_ws&fi%DRsd_STi36U?#9C(YNEH=hGn0viX75`FA1r5Yx*tZB7_ z#+#QHp~Xsy%O}+(CQByrX`EwI&7_0p#_gCG<{L+{+N_jWvNJ({4vKL{kH-8GWWm8x zAhW_rF<*@(i)srsVGk_E{F<5@wFtP^(l2auO7yky0So_~EJkd6gpdoER~AI9%Dlu6 zVpDzkmag+29GmnVO%3ShInAL>drPg(4^N#1eblnt0PM2gDeC*pIGU(G(fpmx`lj7( zl1%Jw!A#fpv8*4}Db4VzBeGw(;|abm8Qc5xB=@f%Z`)o!Wa`#xZMcJ>!DQgzx z6j13DSMaV$h*)7dv^NS=(<)mZ{%5lMRq<4e=%atg7n(p-xm^!w)ASf-e!~kZDU~=g6+c%v z*43|G0O&v97P3MnL=T6eBxN+(J;3oh0*P4z4TXf~mC!(`J(KE2iSwPp!mOmZZ={Fk z$wdE;x=u^^+Qcibgj5Cq+xKQ^m%u@}Fp=pXc)?UWZ^Cg#_`$#oQ3THtdl2Epp!FJv z*@>IxDeEc!xxBz+UeesV9#eo69HPj!F?&()0F7CT2;zF6azd;rtz+kk$w@v_C-tNx zo!B0x_+t(D=ZdvAPk=X4;_Sfz;cieRLLV5vXrq#Jcjz#DpicO&ZJb1fBuZI~@dt9# z*9x@4D3H$HU_wf!-IYrhdrAsv1zrU50%P>Fg~#-_6{5P@;be9CfV>)Z(!II(7_ByE zBcM5i6VVtH!?37kvM?_i(*lmDbJ3TaG0sbU6h^tNUHc>vB~yo0T_uW+`xal+W=rZw zlLgs&Ae^K#0laq^8W_2a#24D$Pts$Lk(;-@QLlt=amBSwistroI4q=kZlSfJX^{@C zj~eg3lL9)m`92Uz@FdM99f9Q zZtIbu>B^C3Y7YX&RVsH49A*!@>A<6-^&xhgEnXLrL+@9xx;H&m9AHlysi7ZoF-1 z_shIIN<$d*pI4-cRQZk=5Be6S+I|s`Tnxjqar6VT6N5YZ_SliwAPh4aFH6qHhJNvJ zpyJ`~bZ8)UV*0HoJZ z)kmhr`3z%?+$&R_3MCd=(cPx@1{yt>H{|eC{HO%(ycKad{)Zt0a1ewW6NreW9r1WmDlZSgg1l4zR9LadXY)x%!Qh) zi}ue^k!yEfZC_PwFYoz+Jd_wDrP;(D3+@XbxJtBmmiR%!I9CHKwD~biPUA>0?P)-^ z>fR>a#M!TNw+=fSwGvW1sY5oBQ0gH)i4c{5ml77YMoDs$7^{h&K@EXLq2=eh9l%+cFl`k zAwXrcrMIX~!N9`{vX$VdE(4vnEkN9G-Bt4~E{7s>JR{WDeBoF6n?1rER`}!P5azbc zIyEc5;NV8Ow9Y#%;RkV5r1quZx@ZAPD4PDQs_uiwm6xRf@1tSIwsXg@kYTu&6Kr9# zYnBhEnSqn3Po=Bwu3*D3KAk-=icyu2>k(w$a4rwc=;t?D_&|*@dgwWl5R_p^yTzYAGhpf+3ll@!4y1sE%e|PlSU;Lf<@e=(F-?k0 z;|~WiYv#k7osdZXP|LuzFLby$vaL!;9F`F!k6-o;%1D_9m7DW+i1o$ zL8cGkaEfeE2lU)pBOaJ2wq86cllc}J@yWe_hLwfguIIdYYBRfDRS&{HS<1RvwGATEUHZEjSLFR)>WZK4Y9^H#l=TGpcki z-LUFi`JDbhL9rDw>1PXZUSWrYf1C=ycG9nj%*wvuSE72QL79kT`TOf+#HSa>QK9f@ zZQJ8-j!IOXVULj8b%q`&dVKyDB~~zivx-?Mo3ZP8$gUi|710LC$->URm#xQ~sPoV} zBh_z_G#T{|#pj6`7W}F34oXMQ zYD;Y7iSn!=d@|t^1fEDR74FIZInCP-4-}Eh@7uenQZtf&RRx9)#TyUj);ByJfW6<2 zOby(@NWXdE)}vrnim|p)mC`pCjaay>L0CfB2=GXhz_n5;fIJ_+CCW<_Y(fN$WE24L08|Ba6IYyGMOuy>uJ}L*10C zmqg%LncmBO2rMS;Q=h%+ye`Y_(fEmAAW#cH7F-jcKn{*V9gI;c%j==xf3R%hw2ab! zDr?nWN(CKrpjZ$s#Gp0sw^Jhm{WNSc$W+EKmVHN@f;;dMbaC0R(fn7km@*--*x1z& zpP_l31{DS8xYq=w7N0eYgh0~N&WKnGD*@&n@(>*2C3f53yQ5 z5rk_CLmTb>PQbd3KP$`@*tS-}AtD(pog$aLxceoK%5(M{7Z#Vlg>on!Don({CU<;W z;JLmd{teoRg&vrLPeO)rq@!2Q@z7$gC&67EP*ScwneVNNpUJ8V^fMsoZDVsFAOZ27 zSxTNQ^pqYv-}mDVoWVS&uY*+{oiLT?Mp10$8=_PC55rRz0UPe_M&}8m;M+*Bj-lz@ zeusZ1`~lsZy7Xs*wG}9b5h{!oAB~A{+7G{zRR-&E;uCEHkY1M1MbIn=u^de_GQr?0 zE~~0Us_ z7To;!!|Rg>d;wi=4fRK4&pCRJDiJ@h_r)t}6rqY0<7Qb5!{5}f%9j_wW_g8jgYh+3X~&vw4sD8za zfaeG8;y-cfksVmm_}pz2@)0Vw+-Xu<=Ryj(rbXuKBlEW2NO4&kFiO8z*Fm3M>g7b#**}yw==?NnlRK;zGfCC(DrVW~lh~upq!U z3W;4c;^=62W&`10KHIaA9U_(NSv}H`;f2T_6%}cxo|?2M45tgxB5w$GO$$03&1KUp z7D4XwkSAMp?=vU9Q2luTXp9BCFpD+P|0YL1H!1q`)_L$enEe|MeU%oSGtn;5k4lt{H36u zI`FKqQfn>slqO58xb~aO5j=QOELtQT!;?{trZh$aKh_zhD{y`<7xHd}Mi~77iLpjb zYgphK+0tQO3T7N!7va={nzCmA6v0F7NK%!n05zlySHOOJU4H#kM$OWhN?qQVhPvJHqDK*gK|;VXcxRg z)brbyd7l<#w@Ye0lUL4@b^L<>}dgjvpF zKl3}^ZMX)JJu7vBhczI!P~w<~y3|~ci9d;tTiYgB7E)Etrx_R3j$12yqpY@>0x<|p zw7AGeeh75l6tGzc(B`uM{NtD5>jvLCgtMN5mAPAeDEZ6lm#g;6-f=|58vOoakRW}U zwYJNks;R!9&Mwsjc5x-0k5>+%$478FZ~9T%dQq|$x5}G8J3~54~lTJ)UDN zCWcl&A)6%mTWH^A7zv#2Xp~E<{AQsncT`E6Cfxsl(gD8s--XZiz;u6nY2UKr7?2t+ z9yGmP?v{hoYF8UkeXsGYZZLbO0M7+SKTFz#Z=_;9XxVi}or(I%7RZ>VGGB5}suBbx z?Qjm*hPG?>d2}n9s-mx1>&wrroEkyPLNen<4sFMu-eJ?fww&MBdaS?^-X^Nx84*=l zXH+EH+;^5e3B)i!X`+hY58PvzCEEMU%TUvh&$h0r1teRZ3Pz|C%gn}ymC9Mh-h%o6 z|7#R4ObQSu3R&2KdS{3scUZIlfauJcyx+QvPPEn%UXdD<`sAkAL(qboOULR=TKzG9 zko&^8Gs9(5CbSN^PKwjeH^gxQ?uC36vbBTPG$99sPB1REgowikx7f^lW0aZd{V^^p zVIXhkt&n_wDzYqlsViylZO8f=9@iNT3Fh1YV%(9`&G?PPGEw)D#LIAjq@W3EvCn~i zFt7oi(j_jgoLX@!)M1KIy)=xy;iMv-$c=~`4?mMcce~TEowP=m%+J7%5fnF}6KC#B zPCovko6AI-J{6D%Yrjm?E#3cO&Cr+Q?Th}RNi3OM^mnrAvOcUjY|~)G`leP{qG(2j z0h#Wb(1~(?`<}~mrG*79Uiz&S^jc)t(mT@U=#VFQw`B-eM%7LljP8%|qzap<$=hN? zekcH+b+2kY`iNYUB|lt?w3E1tPK}q1h4Wf$66IB!%Upmq>l&WYjRcD1y-_a)P2?N<3qm+Zst!^y)uvrS<_(gmG% zUjOJ z%BEZs2w(F=+I4{|81(iJcp0C?nT0Q%9sV6yl!ZqU1(4LzSqVKGH|;g29hZ8KxTghyP*2u2$23xN8oMfHi0VA2r`^SCwJ5-|=ywCz2zN zn1Pxj48cfcVOPy?Llmy)pdAG$g`NI{F7%L00IfehXbAef>(CvEtbKkijaJ1QTr2Wk zB3;n-!Hu~uewEpq+No(V5MoBDXjX-nO|H?x7|OMp)+Qg1IUnYle1px q&8{C4Civq&ch7+LMsB+c<)tsbrBx)}KZUP1T8luDvM6oUCaQMC&3YsN literal 0 HcmV?d00001 diff --git a/backend/ATS_agent/config.py b/backend/ATS_agent/config.py new file mode 100644 index 0000000..eb3a55f --- /dev/null +++ b/backend/ATS_agent/config.py @@ -0,0 +1,104 @@ +CV_PATH = "최재강_이력서.pdf" +MODEL = 1 # 1=OpenAI, 2=Groq, 3=Gemini +ADVANCED = True +GENERATE_HTML = True + +# Job description +JD_TEXT = """ +AI Lab을 소개해요.음악, 스토리 등 다양한 콘텐츠의 추천과 검색을 위해 고객의 행동과 데이터를 분석하고 이를 바탕으로 AI 모델과 추천 시스템을 설계·학습·운영하는 일을 하고 있어요. 특히 엔터테인먼트 산업에 특화된 AI 기술을 개발하며, 이렇게 만든 모델을 실제 서비스에 적용해 사용자 경험을 높이고 비즈니스 효율을 극대화하는 것이 목표예요. ‍함께 할 업무를 알려드려요.LLM, 생성형 AI, 멀티에이전트 시스템 등 다양한 AI 모델을 연구하고 개발하는 업무를 경험해요. 음악, 스토리 등 콘텐츠 추천 모델을 함께 연구하고 만들어요. ‍텍스트, 오디오, 비디오 등 다양한 데이터를 활용해 모델을 설계하고 학습 시키며 성능을 개선해요. 연구한 모델을 실제 서비스에 적용하고, 안정적으로 운영될 수 있도록 관리해요. 앞으로 성장하며 경험할 수 있는 업무예요.생성형 AI, LLM 등 AI 모델을 직접 연구하고 개발하면서 AI 분야의 전문가로 성장할 수 있어요. 직접 연구한 모델을 기반으로 AI 기반 서비스 기획이나 전략을 수립하며 비즈니스와 기술을 연결하는 역할을 수행할 수 있어요.이런 분이면 더 좋을 것 같아요. 다양한 콘텐츠와 K-pop, 엔터테인먼트 산업에 관심이 많고 이해도가 있으신 분이면 좋아요.특히 직접 웹툰, 웹소설, 영상, 음악 등 디지털 콘텐츠를 즐기고 경험해본 분이면 좋을 것 같아요. 학회나 컨퍼런스에서 논문을 발표하거나 출판해본 경험이 있는 분이면 많은 도움이 될 것 같아요. 원활한 커뮤니케이션과 논리적인 사고로 문제를 해결하는 것을 좋아하시는 분을 환영해요. 영어 또는 다른 외국어 사용에 능숙하시거나 완벽하지 않더라도 두려움이 없으신 분이면 좋을 것 같아요. +인터넷·IT·통신·모바일·게임>빅데이터·AI(인공지능)>인공지능(AI)|인터넷·IT·통신·모바일·게임>응용프로그래머>인공지능(AI)|인터넷·IT·통신·모바일·게임>ERP·시스템분석·설계>인공지능(AI)서비스기획 +""" + +DEFAULT_SECTION_PATTERNS = { + 'personal_info': r'(Personal\s*Information|Contact|Profile)', + 'summary': r'(Summary|Professional\s*Summary|Profile|Objective)', + 'education': r'(Education|Academic|Qualifications|Degrees)', + 'experience': r'(Experience|Work\s*Experience|Employment|Career\s*History)', + 'skills': r'(Skills|Technical\s*Skills|Competencies|Expertise)', + 'projects': r'(Projects|Key\s*Projects|Professional\s*Projects)', + 'certifications': r'(Certifications|Certificates|Accreditations)', + 'languages': r'(Languages|Language\s*Proficiency)', + 'publications': r'(Publications|Research|Papers)', + 'awards': r'(Awards|Honors|Achievements|Recognitions)' +} + +KOREAN_SECTION_PATTERNS = { + 'personal_info': r'(개인\s*정보|인적\s*사항|연락처|프로필)', + 'summary': r'(요약|소개|경력\s*요약|프로필|지원\s*동기)', + 'education': r'(학력|교육|학위|교육\s*사항)', + 'experience': r'(경력|경력\s*사항|직무\s*경험|근무\s*경력|프로젝트\s*경험)', + 'skills': r'(기술|보유\s*기술|핵심\s*역량|스킬|기술\s*역량)', + 'projects': r'(프로젝트|주요\s*프로젝트|연구\s*과제)', + 'certifications': r'(자격증|자격|인증|어학|어학\s*성적)', + 'languages': r'(언어|어학|언어\s*능력|외국어)', + 'publications': r'(논문|발표|출판|연구)', + 'awards': r'(수상|수상\s*경력|수상\s*내역|상훈)' +} + +LANGUAGE_SECTION_PATTERNS = { + 'en': DEFAULT_SECTION_PATTERNS, + 'ko': { + **DEFAULT_SECTION_PATTERNS, + **KOREAN_SECTION_PATTERNS + } +} + +LANGUAGE_SCORE_TEMPLATES = { + 'en': "Score: {score} points", + 'ko': "점수: {score}점" +} + +LANGUAGE_CATEGORY_LABELS = { + 'en': [ + 'Keywords', 'Experience', 'Industry Fit', 'Content Quality', 'Format' + ], + 'ko': [ + '키워드 적합도', '경력 적합도', '산업 적합도', '콘텐츠 품질', '형식' + ] +} + +LANGUAGE_HTML_LABELS = { + 'en': { + 'title': 'Resume ATS Analysis Report', + 'analysis_date': 'Analysis Date', + 'score_breakdown': 'Score Breakdown', + 'executive_summary': 'Executive Summary', + 'ats_results': 'ATS Simulation Results', + 'improvement': 'Recommended Improvements', + 'detailed_analysis': 'Detailed Analysis', + 'keywords_match': 'Keywords Match', + 'experience_match': 'Experience & Qualifications', + 'ats_simulation': 'ATS Simulation', + 'industry_alignment': 'Industry Alignment', + 'content_quality': 'Content Quality', + 'format_quality': 'Format & Readability', + 'error_check': 'Errors & Consistency' + }, + 'ko': { + 'title': '이력서 ATS 분석 보고서', + 'analysis_date': '분석 일시', + 'score_breakdown': '세부 점수 현황', + 'executive_summary': '요약 평가', + 'ats_results': 'ATS 시뮬레이션 결과', + 'improvement': '개선 권장 사항', + 'detailed_analysis': '세부 분석', + 'keywords_match': '키워드 적합도', + 'experience_match': '경력 및 자격 적합도', + 'ats_simulation': 'ATS 시뮬레이션', + 'industry_alignment': '산업 적합도', + 'content_quality': '콘텐츠 품질', + 'format_quality': '형식 및 가독성', + 'error_check': '오류 및 일관성' + } +} + + +SCORE_WEIGHTS = { + 'ats_simulation': 0.30, + 'keywords': 0.25, + 'experience': 0.20, + 'industry_specific': 0.15, + 'content': 0.05, + 'format': 0.03, + 'errors': 0.02, +} \ No newline at end of file diff --git a/backend/ATS_agent/llm_handler.py b/backend/ATS_agent/llm_handler.py new file mode 100644 index 0000000..f71817b --- /dev/null +++ b/backend/ATS_agent/llm_handler.py @@ -0,0 +1,176 @@ +import os +import openai +from dotenv import load_dotenv + + +class LLMHandler: + def __init__(self): + self.llm_call_count = 0 + self.total_tokens = 0 + self._load_api_keys() + + def _load_api_keys(self): + env_paths = [] + + try: + env_paths.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '.env')) + except: + pass + + try: + env_paths.append('/mnt/e/code/GJS/JobPT-main/research/validate_agent/.env') + except: + pass + + env_file_path = None + for path in env_paths: + if os.path.exists(path): + env_file_path = path + break + + if not env_file_path: + default_path = env_paths[0] if env_paths else '.env' + print(f"Warning: .env file not found. Creating default at {default_path}") + self._create_default_env(default_path) + env_file_path = default_path + + load_dotenv(env_file_path) + + def _create_default_env(self, path): + with open(path, 'w') as f: + f.write("# API Keys for ATS Analyzer\n") + f.write("# Replace with your actual API keys\n\n") + f.write("# OpenAI API Key\n") + f.write("OPENAI_API_KEY=your_openai_api_key_here\n\n") + f.write("# Groq API Key (optional, only needed if using model=2)\n") + f.write("GROQ_API_KEY=your_groq_api_key_here\n\n") + f.write("# Gemini API Key (optional, only needed if using model=3)\n") + f.write("GEMINI_API_KEY=your_gemini_api_key_here\n") + + def call_llm(self, prompt, model=1, language='en'): + try: + system_prompt = "You are an expert resume analyst and ATS specialist." + if language == 'ko': + system_prompt += " 모든 답변은 한국어로 제공하되, 지시된 용어 형식을 유지하세요." + + if model == 1: + return self._call_openai(prompt, system_prompt) + elif model == 2: + return self._call_groq(prompt, system_prompt) + elif model == 3: + return self._call_gemini(prompt, system_prompt) + else: + return "Error: Invalid model selection" + + except Exception as e: + print(f"Error calling LLM API: {e}") + return self._generate_dummy_response(prompt) + + def _call_openai(self, prompt, system_prompt): + openai_api_key = os.getenv("OPENAI_API_KEY") + if not openai_api_key or openai_api_key == "your_openai_api_key_here": + print("Error: OpenAI API key not found or not set in .env file") + print("Attempting to use alternative model...") + return self._generate_dummy_response(prompt) + + client = openai.OpenAI(api_key=openai_api_key) + response = client.chat.completions.create( + model="gpt-4.1-nano", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ], + temperature=0.1, + max_tokens=1500 + ) + + self.llm_call_count += 1 + self.total_tokens += response.usage.total_tokens + return response.choices[0].message.content.strip() + + def _call_groq(self, prompt, system_prompt): + try: + from groq import Groq + except ImportError: + print("Error: Groq package not installed. Please install it with 'pip install groq'") + print("Falling back to OpenAI API...") + return self._call_openai(prompt, system_prompt) + + groq_api_key = os.getenv("GROQ_API_KEY") + if not groq_api_key or groq_api_key == "your_groq_api_key_here": + print("Error: Groq API key not found or not set in .env file") + print("Falling back to OpenAI API...") + return self._call_openai(prompt, system_prompt) + + client = Groq(api_key=groq_api_key) + completion = client.chat.completions.create( + model="meta-llama/llama-4-maverick-17b-128e-instruct", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ], + temperature=0.1, + max_completion_tokens=1500, + top_p=1, + stream=False, + stop=None, + ) + + self.llm_call_count += 1 + self.total_tokens += completion.usage.total_tokens + return completion.choices[0].message.content.strip() + + def _call_gemini(self, prompt, system_prompt): + gemini_api_key = os.getenv("GEMINI_API_KEY") + if not gemini_api_key or gemini_api_key == "your_gemini_api_key_here": + print("Error: Gemini API key not found or not set in .env file") + print("Attempting to use OpenAI API instead...") + return self._call_openai(prompt, system_prompt) + + client = openai.OpenAI( + api_key=gemini_api_key, + base_url="https://generativelanguage.googleapis.com/v1beta/openai/" + ) + response = client.chat.completions.create( + model="gemini-2.0-flash-lite", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ], + temperature=0.1, + max_tokens=1500 + ) + + self.llm_call_count += 1 + self.total_tokens += response.usage.total_tokens + return response.choices[0].message.content.strip() + + def _generate_dummy_response(self, prompt): + print("Generating dummy response for testing purposes...") + + if "keywords" in prompt.lower(): + return "This is a dummy keywords analysis.\n\nThe resume contains some keywords that match the job description, but could be improved by adding more specific technical skills and qualifications.\n\nScore: 65 points" + elif "experience" in prompt.lower(): + return "This is a dummy experience analysis.\n\nThe candidate's experience partially matches the job requirements. Some areas could be strengthened to better align with the position.\n\nScore: 70 points" + elif "format" in prompt.lower(): + return "This is a dummy format analysis.\n\nThe resume has a clean format but could be improved with better section organization and more consistent formatting.\n\nScore: 75 points" + elif "content" in prompt.lower(): + return "This is a dummy content quality analysis.\n\nThe content is generally good but could use more quantifiable achievements and specific examples.\n\nScore: 68 points" + elif "errors" in prompt.lower(): + return "This is a dummy errors analysis.\n\nThe resume has few grammatical errors but some inconsistencies in formatting and punctuation.\n\nScore: 80 points" + elif "industry" in prompt.lower(): + return "This is a dummy industry analysis.\n\nThe resume shows good industry alignment but could benefit from more industry-specific terminology.\n\nScore: 72 points" + elif "competitive" in prompt.lower(): + return "This is a dummy competitive analysis.\n\nThe resume is competitive but could be strengthened in areas of technical expertise and project outcomes.\n\nScore: 70 points" + elif "improvements" in prompt.lower(): + return "This is a dummy improvement suggestions.\n\n1. Add more technical keywords from the job description\n2. Quantify achievements with specific metrics\n3. Improve formatting for better ATS readability" + elif "final assessment" in prompt.lower(): + return "This is a dummy final assessment.\n\nThe resume is generally well-aligned with the job description but has room for improvement in keyword matching and experience presentation.\n\nFinal recommendation: Make minor improvements before applying." + else: + return "This is a dummy response for testing purposes. In a real scenario, this would contain a detailed analysis based on your prompt.\n\nScore: 70 points" + + def get_statistics(self): + return { + 'llm_call_count': self.llm_call_count, + 'total_tokens': self.total_tokens + } \ No newline at end of file diff --git a/backend/ATS_agent/report_generator.py b/backend/ATS_agent/report_generator.py new file mode 100644 index 0000000..e9a8417 --- /dev/null +++ b/backend/ATS_agent/report_generator.py @@ -0,0 +1,386 @@ +import os +import numpy as np +import matplotlib.pyplot as plt +from io import BytesIO +import base64 +from datetime import datetime + +try: + from ATS_agent.config import LANGUAGE_CATEGORY_LABELS, SCORE_WEIGHTS + from ATS_agent.utils import configure_plot_fonts, restore_plot_fonts, render_markdown +except ModuleNotFoundError: + from config import LANGUAGE_CATEGORY_LABELS, SCORE_WEIGHTS + from utils import configure_plot_fonts, restore_plot_fonts, render_markdown + + +class ReportGenerator: + def __init__(self, analyzer): + self.analyzer = analyzer + + def generate_improvement_suggestions(self): + prompt = f""" + Based on the comprehensive analysis of this resume against the job description, provide specific, actionable improvements. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + RESUME: + {self.analyzer.preprocessed_cv} + + ANALYSIS RESULTS: + Keywords Analysis: {self.analyzer.scores.get('keywords', 'N/A')}/100 + Experience Match: {self.analyzer.scores.get('experience', 'N/A')}/100 + Format & Readability: {self.analyzer.scores.get('format', 'N/A')}/100 + Content Quality: {self.analyzer.scores.get('content', 'N/A')}/100 + Errors & Consistency: {self.analyzer.scores.get('errors', 'N/A')}/100 + ATS Simulation: {self.analyzer.scores.get('ats_simulation', 'N/A')}/100 + Industry Alignment: {self.analyzer.scores.get('industry_specific', 'N/A')}/100 + + + Please provide specific, actionable improvements in these categories: + + + 1. CRITICAL ADDITIONS: Keywords and qualifications that must be added + 2. CONTENT ENHANCEMENTS: How to strengthen existing content + 3. FORMAT IMPROVEMENTS: Structural changes to improve ATS compatibility + 4. REMOVAL SUGGESTIONS: Content that should be removed or de-emphasized + 5. SECTION-BY-SECTION RECOMMENDATIONS: Specific improvements for each resume section + + + For each suggestion, provide a clear before/after example where possible. + Focus on the most impactful changes that will significantly improve ATS performance and human readability. + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + self.analyzer.improvement_suggestions = response + return response + + def generate_optimized_resume(self): + prompt = f""" + Create an optimized version of this resume specifically tailored for the job description. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + JOB DESCRIPTION: + {self.analyzer.jd_text} + + CURRENT RESUME: + {self.analyzer.preprocessed_cv} + + Please rewrite the resume to: + 1. Incorporate all relevant keywords from the job description + 2. Highlight the most relevant experience and qualifications + 3. Use ATS-friendly formatting and structure + 4. Quantify achievements where possible + 5. Remove or downplay irrelevant information + + + The optimized resume should maintain truthfulness while presenting the candidate in the best possible light for this specific position. + Use standard resume formatting with clear section headers. + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + self.analyzer.optimized_resume = response + return response + + def generate_final_score_and_recommendations(self): + weighted_sum = 0 + used_weights_sum = 0 + category_scores = {} + + for category, weight in SCORE_WEIGHTS.items(): + if category in self.analyzer.scores: + score = self.analyzer.scores[category] + try: + score_value = float(score) + except (TypeError, ValueError): + continue + + weighted_sum += score_value * weight + used_weights_sum += weight + category_scores[category] = score_value + + if used_weights_sum > 0: + final_score = weighted_sum / used_weights_sum + else: + numeric_scores = [] + for key, value in self.analyzer.scores.items(): + if key == 'final': + continue + try: + numeric_scores.append(float(value)) + except (TypeError, ValueError): + continue + final_score = sum(numeric_scores) / len(numeric_scores) if numeric_scores else 0 + + self.analyzer.scores['final'] = final_score + + jd_summary = "" + if self.analyzer.jd_analysis: + jd_summary = "JOB DESCRIPTION ANALYSIS:\n" + if self.analyzer.jd_analysis.get('required_qualifications'): + jd_summary += "Required Qualifications: " + ", ".join(self.analyzer.jd_analysis.get('required_qualifications')[:5]) + "\n" + if self.analyzer.jd_analysis.get('technical_skills'): + jd_summary += "Technical Skills: " + ", ".join(self.analyzer.jd_analysis.get('technical_skills')[:5]) + "\n" + if self.analyzer.jd_analysis.get('key_responsibilities'): + jd_summary += "Key Responsibilities: " + ", ".join(self.analyzer.jd_analysis.get('key_responsibilities')[:3]) + "\n" + + prompt = f""" + Based on the comprehensive analysis of this resume against the job description, provide a final assessment and recommendations. + **IMPORTANT: OUTPUT LANGUAGE MUST FOLLOW CV and JD LANGUAGE** + + {jd_summary} + + RESUME ANALYSIS SCORES: + ATS Simulation Score: {category_scores.get('ats_simulation', 'N/A')}/100 (30% of final score) + Keywords Match: {category_scores.get('keywords', 'N/A')}/100 (25% of final score) + Experience Match: {category_scores.get('experience', 'N/A')}/100 (20% of final score) + Industry Alignment: {category_scores.get('industry_specific', 'N/A')}/100 (15% of final score) + Content Quality: {category_scores.get('content', 'N/A')}/100 (5% of final score) + Format & Readability: {category_scores.get('format', 'N/A')}/100 (3% of final score) + Errors & Consistency: {category_scores.get('errors', 'N/A')}/100 (2% of final score) + + FINAL WEIGHTED SCORE: {final_score:.1f}/100 + + Please provide a detailed final assessment with: + + 1. EXECUTIVE SUMMARY: A concise summary of how well this resume matches this specific job description + + 2. STRENGTHS: The top 3 strengths of this resume for this specific job + + 3. CRITICAL IMPROVEMENTS: The top 3 most critical improvements needed to better match this job description + + 4. ATS ASSESSMENT: An assessment of the resume's likelihood of passing ATS filters for this specific job + + 5. INTERVIEW POTENTIAL: An assessment of whether this resume would likely lead to an interview + + 6. FINAL RECOMMENDATION: A clear verdict on whether the candidate should: + a) Apply with this resume as is + b) Make minor improvements before applying + c) Make major improvements before applying + + Be specific about which improvements would have the biggest impact on ATS performance for this particular job. + """ + + response = self.analyzer.call_llm(prompt, model=self.analyzer.model) + self.analyzer.final_report = response + + def generate_visual_report(self, output_path="ats_report.html"): + try: + categories = LANGUAGE_CATEGORY_LABELS.get( + self.analyzer.language, + LANGUAGE_CATEGORY_LABELS['en'] + ).copy() + + values = [ + self.analyzer._score_value('keywords'), + self.analyzer._score_value('experience'), + self.analyzer._score_value('industry_specific'), + self.analyzer._score_value('content'), + self.analyzer._score_value('format') + ] + + fig = plt.figure(figsize=(10, 6)) + ax = fig.add_subplot(111, polar=True) + + angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist() + + values.append(values[0]) + angles.append(angles[0]) + categories.append(categories[0]) + + font_settings = configure_plot_fonts(self.analyzer.language) + _, font_prop = font_settings if font_settings else (None, None) + + ax.plot(angles, values, 'o-', linewidth=2) + ax.fill(angles, values, alpha=0.25) + ax.set_thetagrids(np.degrees(angles[:-1]), categories[:-1]) + ax.set_ylim(0, 100) + title_text = self.analyzer._html_label('title', 'Resume ATS Analysis Report') + if font_prop: + ax.set_title(title_text, fontproperties=font_prop, size=15) + else: + ax.set_title(title_text, size=15) + + if font_prop: + for label in ax.get_xticklabels() + ax.get_yticklabels(): + label.set_fontproperties(font_prop) + + buffer = BytesIO() + plt.savefig(buffer, format='png', bbox_inches='tight') + buffer.seek(0) + img_str = base64.b64encode(buffer.read()).decode() + plt.close() + restore_plot_fonts(font_settings) + + html_content = self._generate_html_content(img_str) + + with open(output_path, 'w', encoding='utf-8') as f: + f.write(html_content) + + return output_path + + except Exception as e: + print(f"Error generating visual report: {e}") + return None + + def _generate_html_content(self, img_str): + html_title = self.analyzer._html_label('title', 'Resume ATS Analysis Report') + analysis_date_label = self.analyzer._html_label('analysis_date', 'Analysis Date') + score_breakdown_label = self.analyzer._html_label('score_breakdown', 'Score Breakdown') + executive_summary_label = self.analyzer._html_label('executive_summary', 'Executive Summary') + ats_results_label = self.analyzer._html_label('ats_results', 'ATS Simulation Results') + improvement_label = self.analyzer._html_label('improvement', 'Recommended Improvements') + detailed_label = self.analyzer._html_label('detailed_analysis', 'Detailed Analysis') + keywords_label = self.analyzer._html_label('keywords_match', 'Keywords Match') + experience_label = self.analyzer._html_label('experience_match', 'Experience & Qualifications') + industry_label = self.analyzer._html_label('industry_alignment', 'Industry Alignment') + content_label = self.analyzer._html_label('content_quality', 'Content Quality') + format_label = self.analyzer._html_label('format_quality', 'Format & Readability') + error_label = self.analyzer._html_label('error_check', 'Errors & Consistency') + chart_alt = self.analyzer._localized_context("ATS Analysis Chart", "ATS 분석 차트") + not_available = self.analyzer._localized_context("Not available", "제공되지 않음") + + score_values = { + 'final': self.analyzer._score_value('final'), + 'keywords': self.analyzer._score_value('keywords'), + 'experience': self.analyzer._score_value('experience'), + 'format': self.analyzer._score_value('format'), + 'content': self.analyzer._score_value('content'), + 'errors': self.analyzer._score_value('errors'), + 'industry_specific': self.analyzer._score_value('industry_specific'), + 'ats_simulation': self.analyzer._score_value('ats_simulation'), + } + + def progress_class(value): + return 'good' if value >= 80 else 'medium' if value >= 60 else 'poor' + + html_content = f""" + + + + {html_title} + + + +
+
+

{html_title}

+

{analysis_date_label}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

+
+ +
+

{score_breakdown_label}

+ {chart_alt} +
+ +
+

{executive_summary_label}

+
{render_markdown(self.analyzer.final_report)}
+
+ +
+

{improvement_label}

+
{render_markdown(self.analyzer.improvement_suggestions)}
+
+ +
+

{detailed_label}

+ +
{render_markdown(self.analyzer.analysis_results.get('ats_simulation', not_available))}
+ +

{keywords_label} ({score_values['keywords']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('keywords', not_available))}
+ +

{experience_label} ({score_values['experience']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('experience', not_available))}
+ +

{format_label} ({score_values['format']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('format', not_available))}
+ +

{content_label} ({score_values['content']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('content', not_available))}
+ +

{industry_label} ({score_values['industry_specific']:.0f}/100)

+
+
+
+
{render_markdown(self.analyzer.analysis_results.get('industry_specific', not_available))}
+
+ +
+

{self.analyzer._localized_context('Competitive Analysis', '경쟁력 분석')}

+
{render_markdown(self.analyzer.analysis_results.get('competitive', not_available))}
+
+
+ + + """ + + return html_content + + def generate_text_report(self): + report = "=== ATS ANALYSIS REPORT ===\n\n" + + report += "SCORE BREAKDOWN:\n" + report += f"- Keywords Match: {self.analyzer._score_value('keywords'):.0f}/100\n" + report += f"- Experience Match: {self.analyzer._score_value('experience'):.0f}/100\n" + report += f"- Format & Readability: {self.analyzer._score_value('format'):.0f}/100\n" + report += f"- Content Quality: {self.analyzer._score_value('content'):.0f}/100\n" + report += f"- Industry Alignment: {self.analyzer._score_value('industry_specific'):.0f}/100\n\n" + + report += "EXECUTIVE SUMMARY:\n" + report += f"{self.analyzer.final_report}\n\n" + + report += "RECOMMENDED IMPROVEMENTS:\n" + report += f"{self.analyzer.improvement_suggestions}\n\n" + + report += "USAGE STATISTICS:\n" + report += f"- LLM API Calls: {self.analyzer.llm_call_count}\n" + report += f"- Total Tokens Used: {self.analyzer.total_tokens}\n" + report += f"- Analysis Time: {self.analyzer.total_time:.2f} seconds\n" + + return report \ No newline at end of file diff --git a/backend/ATS_agent/upstage_parser.py b/backend/ATS_agent/upstage_parser.py new file mode 100644 index 0000000..4e30bfd --- /dev/null +++ b/backend/ATS_agent/upstage_parser.py @@ -0,0 +1,52 @@ +import requests +import os +from dotenv import load_dotenv + +def upstage_parser(file_path): + load_dotenv('.env') + api_key = os.getenv("UPSTAGE_API_KEY") + filename = file_path + + url = "https://api.upstage.ai/v1/document-digitization" + headers = {"Authorization": f"Bearer {api_key}"} + + with open(filename, "rb") as f: + files = {"document": f} + data = {"ocr": "force", "base64_encoding": "['table']", "model": "document-parse", "output_formats": "['markdown']"} + response = requests.post(url, headers=headers, files=files, data=data) + + if response.status_code != 200: + print(f"API error: {response.status_code} - {response.text}") + return None, None, None + + try: + response_json = response.json() + + coordinates = [] + contents = [] + + if 'elements' in response_json: + for i in response_json['elements']: + if 'coordinates' in i: + coordinates.append(i['coordinates']) + if 'content' in i and 'markdown' in i['content']: + contents.append(i['content']['markdown']) + + full_contents = "" + if 'content' in response_json and 'markdown' in response_json['content']: + full_contents = response_json['content']['markdown'] + + return contents, coordinates, full_contents + + except (KeyError, ValueError, TypeError) as e: + print(f"Error parsing response: {e}") + return None, None, None + +if __name__ == "__main__": + file_path = "sample_cv.jpg" + contents, coordinates, full_contents = upstage_parser(file_path) + print(contents) + print(len(contents)) + print(coordinates) + print(len(coordinates)) + print(full_contents) \ No newline at end of file diff --git a/backend/ATS_agent/utils.py b/backend/ATS_agent/utils.py new file mode 100644 index 0000000..bd11ecf --- /dev/null +++ b/backend/ATS_agent/utils.py @@ -0,0 +1,311 @@ +import os +import re +import html +import unicodedata +import matplotlib.pyplot as plt +from matplotlib import font_manager + + +def normalize_text(text): + if not text: + return "" + normalized = unicodedata.normalize('NFC', str(text)) + normalized = normalized.replace('\r\n', '\n').replace('\r', '\n') + return normalized + + +def detect_language(text): + if not text: + return 'en' + + normalized = normalize_text(text) + hangul_count = sum(1 for ch in normalized if '\uac00' <= ch <= '\ud7a3') + latin_count = sum(1 for ch in normalized if ch.isascii() and ch.isalpha()) + + if hangul_count == 0 and latin_count == 0: + return 'en' + + if hangul_count >= 50 and hangul_count >= latin_count: + return 'ko' + if hangul_count >= latin_count * 2 and hangul_count >= 20: + return 'ko' + if hangul_count > 0 and latin_count == 0: + return 'ko' + return 'en' + + +def configure_plot_fonts(language): + if language != 'ko': + return (None, None) + + preferred_fonts = [ + 'Malgun Gothic', + 'MalgunGothic', + 'AppleGothic', + 'NanumGothic', + 'NanumBarunGothic', + 'Noto Sans CJK KR', + 'Noto Sans KR' + ] + + candidate_paths = [ + r'C:\\Windows\\Fonts\\malgun.ttf', + r'C:\\Windows\\Fonts\\malgunbd.ttf', + '/System/Library/Fonts/AppleSDGothicNeo.ttc', + '/System/Library/Fonts/AppleGothic.ttf', + '/Library/Fonts/AppleSDGothicNeo.ttf', + '/usr/share/fonts/truetype/nanum/NanumGothic.ttf', + '/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc', + '/usr/share/fonts/truetype/noto/NotoSansKR-Regular.otf' + ] + + previous_family = plt.rcParams.get('font.family') + font_prop = None + + available_fonts = {font.name for font in font_manager.fontManager.ttflist} + for font_name in preferred_fonts: + if font_name in available_fonts: + plt.rcParams['font.family'] = [font_name] + plt.rcParams['axes.unicode_minus'] = False + font_prop = font_manager.FontProperties(family=font_name) + return (previous_family, font_prop) + + for path in candidate_paths: + if os.path.exists(path): + try: + font_manager.fontManager.addfont(path) + font_prop = font_manager.FontProperties(fname=path) + plt.rcParams['font.family'] = [font_prop.get_name()] + plt.rcParams['axes.unicode_minus'] = False + return (previous_family, font_prop) + except Exception: + continue + + plt.rcParams['axes.unicode_minus'] = False + return (previous_family, None) + + +def restore_plot_fonts(previous_settings): + previous_family, _ = previous_settings if previous_settings else (None, None) + if previous_family is not None: + plt.rcParams['font.family'] = previous_family + + +def render_markdown(text): + if not text: + return '' + + try: + import markdown + extensions = ['extra', 'sane_lists', 'codehilite', 'nl2br'] + return markdown.markdown(text, extensions=extensions) + except Exception: + pass + + try: + import markdown2 + extras = ['fenced-code-blocks', 'tables', 'strike', 'code-friendly', 'cuddled-lists'] + return markdown2.markdown(text, extras=extras) + except Exception: + pass + + return basic_markdown_to_html(text) + + +def basic_markdown_to_html(text): + lines = text.splitlines() + html_lines = [] + in_list = False + in_code = False + code_language = '' + table_buffer = [] + + def close_list(): + nonlocal in_list + if in_list: + html_lines.append('') + in_list = False + + def close_code(): + nonlocal in_code + if in_code: + html_lines.append('') + in_code = False + + def flush_table(): + nonlocal table_buffer + if not table_buffer: + return + rows = [row.strip() for row in table_buffer if row.strip()] + table_buffer = [] + if not rows: + return + + header = rows[0] + separator = rows[1] if len(rows) > 1 else '' + data_rows = rows[2:] if re.match(r'^\|?\s*:?-+:?\s*(\|\s*:?-+:?\s*)+\|?$', separator) else rows[1:] + + def split_row(row): + return [cell.strip() for cell in row.strip('|').split('|')] + + html_lines.append('') + html_lines.append('') + for cell in split_row(header): + html_lines.append(f'') + html_lines.append('') + if data_rows: + html_lines.append('') + for data_row in data_rows: + if set(data_row) <= {'|', '-', ':', ' '}: + continue + html_lines.append('') + for cell in split_row(data_row): + html_lines.append(f'') + html_lines.append('') + html_lines.append('') + html_lines.append('
{inline_markdown(html.escape(cell))}
{inline_markdown(html.escape(cell))}
') + + for raw_line in lines: + line = raw_line.rstrip('\n') + + if in_code: + if line.strip().startswith('```'): + close_code() + else: + html_lines.append(html.escape(raw_line)) + continue + + stripped = line.strip() + + if stripped.startswith('```'): + close_list() + flush_table() + in_code = True + code_language = stripped[3:].strip() + class_attr = f' class="language-{html.escape(code_language)}"' if code_language else '' + html_lines.append(f'
')
+            continue
+
+        if stripped in {'---', '***', '___'}:
+            close_list()
+            flush_table()
+            html_lines.append('
') + continue + + if looks_like_table_row(stripped): + table_buffer.append(stripped) + continue + else: + flush_table() + + if not stripped: + close_list() + html_lines.append('
') + continue + + if stripped.startswith('### '): + close_list() + html_lines.append(f"

{html.escape(stripped[4:])}

") + continue + if stripped.startswith('## '): + close_list() + html_lines.append(f"

{html.escape(stripped[3:])}

") + continue + if stripped.startswith('# '): + close_list() + html_lines.append(f"

{html.escape(stripped[2:])}

") + continue + + if stripped.startswith(('- ', '* ')): + if not in_list: + html_lines.append('
    ') + in_list = True + content = stripped[2:] + html_lines.append(f"
  • {inline_markdown(html.escape(content))}
  • ") + continue + + close_list() + html_lines.append(f"

    {inline_markdown(html.escape(line))}

    ") + + close_code() + close_list() + flush_table() + return '\n'.join(html_lines) + + +def looks_like_table_row(line): + if '|' not in line: + return False + parts = line.strip('|').split('|') + return len(parts) > 1 + + +def inline_markdown(text): + """Handle simple inline markdown such as bold and italics.""" + text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) + text = re.sub(r'\*(.+?)\*', r'\1', text) + text = re.sub(r'`([^`]+)`', r'\1', text) + return text + + +def extract_score(response_text): + patterns = [ + r'Score:\s*(\d+(?:\.\d+)?)\s*points', + r'Score:\s*(\d+(?:\.\d+)?)', + r'score of\s*(\d+(?:\.\d+)?)', + r'rated at\s*(\d+(?:\.\d+)?)', + r'(\d+(?:\.\d+)?)/100', + r'(\d+(?:\.\d+)?)\s*out of\s*100', + r'점수[:\s]*(\d+(?:\.\d+)?)', + r'(\d+(?:\.\d+)?)\s*점' + ] + + normalized_text = normalize_text(response_text) + + for pattern in patterns: + match = re.search(pattern, normalized_text, re.IGNORECASE) + if match: + try: + score_value = float(match.group(1)) + return max(0, min(100, int(round(score_value)))) + except ValueError: + continue + + return 50 + + +def advanced_preprocessing(text): + text = normalize_text(text) + text = re.sub(r'[ \t]+', ' ', text) + text = re.sub(r'\n{3,}', '\n\n', text) + + return text.strip() + + +def extract_resume_sections(text, section_patterns): + compiled_patterns = { + name: re.compile(pattern, re.IGNORECASE) + for name, pattern in section_patterns.items() + } + + sections = {} + current_section = 'header' + sections[current_section] = [] + + lines = text.split('\n') + for line in lines: + matched = False + for section_name, pattern in compiled_patterns.items(): + if pattern.search(line): + current_section = section_name + sections[current_section] = [] + matched = True + break + + if not matched: + sections[current_section].append(line) + + for section in sections: + sections[section] = '\n'.join(sections[section]).strip() + + return sections \ No newline at end of file diff --git a/backend/api_test.py b/backend/api_test.py deleted file mode 100644 index e16e7fc..0000000 --- a/backend/api_test.py +++ /dev/null @@ -1,18 +0,0 @@ -import requests - - -# POST 요청 함수 -def send_post_request(resume_path): - url = "http://localhost:8000/matching" # 실제 API 엔드포인트로 변경하세요. - data = {"resume_path": resume_path} - - try: - response = requests.post(url, json=data) - response.raise_for_status() # 상태 코드가 200번대가 아니면 예외 발생 - print("POST 요청 성공:", response.json()) - except requests.exceptions.RequestException as e: - print("POST 요청 중 오류 발생:", e) - - -# 함수 호출 예시 -send_post_request("data/joannadrummond-cv.pdf") diff --git a/backend/ats_analyzer_improved.py b/backend/ats_analyzer_improved.py deleted file mode 100644 index 11c3bb9..0000000 --- a/backend/ats_analyzer_improved.py +++ /dev/null @@ -1,1393 +0,0 @@ -# %% -import os -import re -import json -import time -import numpy as np -import openai -import PyPDF2 -import docx -import matplotlib.pyplot as plt -from io import BytesIO -import base64 -from datetime import datetime -from dotenv import load_dotenv -from configs import OPENAI_API_KEY, GROQ_API_KEY - -class ATSAnalyzer: - """ - Advanced ATS (Applicant Tracking System) Analyzer - - This class analyzes resumes against job descriptions to simulate - how an ATS system would evaluate the resume, providing detailed feedback - and improvement suggestions. - """ - - def __init__(self, cv_path, jd_text, model=1): - """ - Initialize the ATS Analyzer - - Args: - cv_path (str): Path to the resume file (PDF, DOCX, TXT) or raw text - jd_text (str): Job description text - model (int): Model selection (1=OpenAI, 2=Groq) - """ - self.cv_path = cv_path - self.jd_text = jd_text - self.cv_text = "" - self.preprocessed_cv = "" - self.structured_cv = {} - self.jd_analysis = {} # Added: Store JD analysis results - self.jd_requirements = [] # Added: Store extracted JD requirements - self.jd_keywords = [] # Added: Store extracted JD keywords - self.analysis_results = {} - self.scores = {} - self.final_report = "" - self.improvement_suggestions = "" - self.competitive_analysis = "" - self.optimized_resume = "" - self.llm_call_count = 0 - self.total_tokens = 0 - self.total_time = 0 - self.model = model - - # Load environment variables - load_dotenv() - - def extract_and_preprocess(self): - """Extract text from resume file and preprocess it""" - ext = os.path.splitext(self.cv_path)[1].lower() - text = "" - try: - if len(self.cv_path) > 270: # Likely raw text rather than a file path - text = self.cv_path - elif ext == ".pdf": - with open(self.cv_path, 'rb') as f: - reader = PyPDF2.PdfReader(f) - for page in reader.pages: - text += page.extract_text() + "\n" - elif ext == ".docx": - doc = docx.Document(self.cv_path) - for para in doc.paragraphs: - text += para.text + "\n" - elif ext in [".txt", ".md"]: - with open(self.cv_path, 'r', encoding='utf-8') as f: - text = f.read() - else: - print(f"Unsupported file format: {ext}") - text = "" - except Exception as e: - print(f"Error processing resume file: {e}") - text = "" - - self.cv_text = text.strip() - - # Extract structured sections from the resume - self.structured_cv = self.extract_resume_sections(self.cv_text) - - # Advanced preprocessing - self.preprocessed_cv = self.advanced_preprocessing(self.cv_text) - - # Analyze the job description - self.analyze_job_description() - - print(f"Extracted {len(self.cv_text)} characters from resume") - print(f"Identified {len(self.structured_cv)} sections in the resume") - print(f"Analyzed job description with {len(self.jd_keywords)} keywords extracted") - - def analyze_job_description(self): - """ - Analyze the job description to extract requirements, keywords, and other important information - This is a critical step to ensure the ATS analysis is specific to this particular job - """ - # Extract key requirements and keywords from the JD - jd_analysis_prompt = f""" - Perform a detailed analysis of this job description to extract all information that would be used by an ATS system. - - JOB DESCRIPTION: - {self.jd_text} - - Please provide a comprehensive analysis with the following components: - - 1. REQUIRED QUALIFICATIONS: All explicitly stated required qualifications (education, experience, certifications, etc.) - 2. PREFERRED QUALIFICATIONS: All preferred or desired qualifications that are not strictly required - 3. KEY RESPONSIBILITIES: The main job duties and responsibilities - 4. TECHNICAL SKILLS: All technical skills, tools, languages, frameworks, etc. mentioned - 5. SOFT SKILLS: All soft skills, personal qualities, and character traits mentioned - 6. INDUSTRY KNOWLEDGE: Required industry-specific knowledge or experience - 7. COMPANY VALUES: Any company values or culture fit indicators mentioned - - Format your response as a valid JSON object with these categories as keys, and arrays of strings as values. - Also include a "keywords" array with all important keywords from the job description, each with an importance score from 1-10. - - The JSON must be properly formatted with no errors. Make sure all quotes are properly escaped and all arrays and objects are properly closed. - - Example format: - {{"required_qualifications": ["Bachelor's degree in Computer Science", "5+ years of experience"], - "preferred_qualifications": ["Master's degree", "Experience with cloud platforms"], - "key_responsibilities": ["Develop software applications", "Debug and troubleshoot issues"], - "technical_skills": ["Python", "JavaScript", "AWS"], - "soft_skills": ["Communication", "Teamwork"], - "industry_knowledge": ["Financial services", "Regulatory compliance"], - "company_values": ["Innovation", "Customer focus"], - "keywords": [{{"keyword": "Python", "importance": 9, "category": "Technical Skill"}}, {{"keyword": "Bachelor's degree", "importance": 8, "category": "Education"}}] - }} - - Return ONLY the JSON object with no additional text before or after. - """ - - response = self.call_llm(jd_analysis_prompt, model=self.model) - - # Parse the JSON response - try: - # Try to clean up the response to make it valid JSON - # Remove any text before the first '{' and after the last '}' - start_idx = response.find('{') - end_idx = response.rfind('}') - - if start_idx >= 0 and end_idx >= 0: - response = response[start_idx:end_idx+1] - - # Try to parse the JSON - try: - self.jd_analysis = json.loads(response) - except json.JSONDecodeError as e: - # If parsing fails, try to fix common JSON errors - print(f"Initial JSON parsing failed: {e}") - print("Attempting to fix JSON format...") - - # Fix common JSON errors - # 1. Replace single quotes with double quotes - response = response.replace("'", '"') - - # 2. Fix trailing commas in arrays and objects - response = re.sub(r',\s*}', '}', response) - response = re.sub(r',\s*]', ']', response) - - # Try parsing again - self.jd_analysis = json.loads(response) - - # Extract keywords for later use - self.jd_keywords = self.jd_analysis.get('keywords', []) - - # Compile a list of all requirements - self.jd_requirements = ( - self.jd_analysis.get('required_qualifications', []) + - self.jd_analysis.get('preferred_qualifications', []) + - self.jd_analysis.get('technical_skills', []) + - self.jd_analysis.get('soft_skills', []) + - self.jd_analysis.get('industry_knowledge', []) - ) - - print(f"Successfully parsed JD analysis with {len(self.jd_keywords)} keywords") - - except Exception as e: - print(f"Error parsing JD analysis JSON: {e}") - print(f"Raw response: {response[:500]}...") - - # If all parsing attempts fail, create a default structure with dummy data - print("Creating default JD analysis structure with dummy data") - self.jd_analysis = { - "required_qualifications": ["Master's degree", "1+ years of experience"], - "preferred_qualifications": ["PhD", "Industry experience"], - "key_responsibilities": ["Research", "Development", "Collaboration"], - "technical_skills": ["Python", "Machine Learning", "Deep Learning"], - "soft_skills": ["Communication", "Teamwork"], - "industry_knowledge": ["AI Research", "Software Development"], - "company_values": ["Innovation", "Collaboration"], - "keywords": [ - {"keyword": "Python", "importance": 9, "category": "Technical Skill"}, - {"keyword": "Machine Learning", "importance": 8, "category": "Technical Skill"}, - {"keyword": "Research", "importance": 7, "category": "Experience"}, - {"keyword": "Master's degree", "importance": 8, "category": "Education"} - ] - } - self.jd_keywords = self.jd_analysis["keywords"] - self.jd_requirements = ( - self.jd_analysis["required_qualifications"] + - self.jd_analysis["preferred_qualifications"] + - self.jd_analysis["technical_skills"] + - self.jd_analysis["soft_skills"] + - self.jd_analysis["industry_knowledge"] - ) - - def extract_resume_sections(self, text): - """ - Extract and structure resume sections - - Args: - text (str): Raw resume text - - Returns: - dict: Resume sections as a structured dictionary - """ - # Common resume section header patterns - section_patterns = { - 'personal_info': r'(Personal\s*Information|Contact|Profile)', - 'summary': r'(Summary|Professional\s*Summary|Profile|Objective)', - 'education': r'(Education|Academic|Qualifications|Degrees)', - 'experience': r'(Experience|Work\s*Experience|Employment|Career\s*History)', - 'skills': r'(Skills|Technical\s*Skills|Competencies|Expertise)', - 'projects': r'(Projects|Key\s*Projects|Professional\s*Projects)', - 'certifications': r'(Certifications|Certificates|Accreditations)', - 'languages': r'(Languages|Language\s*Proficiency)', - 'publications': r'(Publications|Research|Papers)', - 'awards': r'(Awards|Honors|Achievements|Recognitions)' - } - - sections = {} - current_section = 'header' # Text before first section is considered header - sections[current_section] = [] - - lines = text.split('\n') - for line in lines: - matched = False - for section_name, pattern in section_patterns.items(): - if re.search(pattern, line, re.IGNORECASE): - current_section = section_name - sections[current_section] = [] - matched = True - break - - if not matched: - sections[current_section].append(line) - - # Combine lines in each section into text - for section in sections: - sections[section] = '\n'.join(sections[section]).strip() - - return sections - - def advanced_preprocessing(self, text): - """ - Advanced text preprocessing for resume analysis - - Args: - text (str): Raw resume text - - Returns: - str: Preprocessed text - """ - # Preserve important formatting like emails, URLs, phone numbers - # Replace excessive whitespace - text = re.sub(r'\s+', ' ', text) - - # Clean up unnecessary line breaks while preserving paragraph structure - text = re.sub(r'\n{3,}', '\n\n', text) - - return text.strip() - - def analyze_keywords(self): - """ - Analyze how well the resume matches key terms in the job description - Uses the pre-analyzed JD to ensure accuracy - """ - # Prepare JD analysis for the prompt - jd_analysis_str = "\n".join([ - "REQUIRED QUALIFICATIONS:\n- " + "\n- ".join(self.jd_analysis.get('required_qualifications', [])), - "PREFERRED QUALIFICATIONS:\n- " + "\n- ".join(self.jd_analysis.get('preferred_qualifications', [])), - "TECHNICAL SKILLS:\n- " + "\n- ".join(self.jd_analysis.get('technical_skills', [])), - "SOFT SKILLS:\n- " + "\n- ".join(self.jd_analysis.get('soft_skills', [])), - "INDUSTRY KNOWLEDGE:\n- " + "\n- ".join(self.jd_analysis.get('industry_knowledge', [])) - ]) - - # Extract top keywords by importance - top_keywords = sorted(self.jd_keywords, key=lambda x: x.get('importance', 0), reverse=True)[:20] - keywords_str = "\n".join([f"- {kw.get('keyword')} (Importance: {kw.get('importance')}/10, Category: {kw.get('category')})" - for kw in top_keywords]) - - prompt = f""" - Analyze how well this resume matches the key requirements and keywords from the job description. - - JOB DESCRIPTION ANALYSIS: - {jd_analysis_str} - - TOP KEYWORDS FROM JOB DESCRIPTION: - {keywords_str} - - RESUME: - {self.preprocessed_cv} - - Please provide a detailed analysis with the following: - - 1. TECHNICAL SKILLS MATCH: Evaluate how well the resume matches the required technical skills - 2. QUALIFICATIONS MATCH: Evaluate how well the resume matches required and preferred qualifications - 3. SOFT SKILLS MATCH: Evaluate how well the resume demonstrates the required soft skills - 4. EXPERIENCE MATCH: Evaluate how well the resume satisfies experience requirements - 5. KEYWORD ANALYSIS: Create a table showing matched and missing keywords, with their importance - - For each category, provide specific examples from both the job description and resume. - Calculate a match percentage for each category, and provide an overall keyword match score. - - End your analysis with "Score: XX points" where XX is a score from 0-100 representing how well the resume matches the job description's keywords and requirements. - """ - - response = self.call_llm(prompt, model=self.model) - print("[DEBUG] Keywords analysis LLM response:\n", response[:300], "...") - - score = self.extract_score(response) - print("[DEBUG] Keywords score:", score) - - self.analysis_results['keywords'] = response - self.scores['keywords'] = score - - def analyze_experience_and_qualifications(self): - """ - Analyze how well the resume's experience and qualifications match the job requirements - """ - prompt = f""" - Evaluate how well the candidate's experience and qualifications match the job requirements: - - JOB DESCRIPTION: - {self.jd_text} - - RESUME: - {self.preprocessed_cv} - - Please provide a detailed analysis of: - 1. Required years of experience vs. candidate's experience - 2. Required education level vs. candidate's education - 3. Required industry experience vs. candidate's industry background - 4. Required responsibilities vs. candidate's demonstrated capabilities - 5. Required achievements vs. candidate's accomplishments - - - For each area, indicate whether the candidate exceeds, meets, or falls short of requirements. - Provide specific examples from both the job description and resume. - - - End your analysis with "Score: XX points" where XX is a score from 0-100 representing how well the candidate's experience and qualifications match the job requirements. - """ - - response = self.call_llm(prompt, model=self.model) - print("[DEBUG] Experience analysis LLM response:\n", response[:300], "...") - - score = self.extract_score(response) - print("[DEBUG] Experience score:", score) - - self.analysis_results['experience'] = response - self.scores['experience'] = score - - def analyze_format_and_readability(self): - """ - Analyze the resume's format, structure, and readability - """ - prompt = f""" - Evaluate the format, structure, and readability of the following resume: - - RESUME: - {self.preprocessed_cv} - - Please analyze: - 1. Overall organization and structure - 2. Readability and clarity - 3. Use of bullet points, sections, and white space - 4. Consistency in formatting (dates, job titles, etc.) - 5. Grammar, spelling, and punctuation - 6. ATS-friendliness of the format - - - Provide specific examples of strengths and weaknesses in the format. - Suggest specific improvements to make the resume more ATS-friendly and readable. - - - End your analysis with "Score: XX points" where XX is a score from 0-100 representing the quality of the resume's format and readability. - """ - - response = self.call_llm(prompt, model=self.model) - print("[DEBUG] Format analysis LLM response:\n", response[:300], "...") - - score = self.extract_score(response) - print("[DEBUG] Format score:", score) - - self.analysis_results['format'] = response - self.scores['format'] = score - - def analyze_content_quality(self): - """ - Analyze the quality of content in the resume - """ - prompt = f""" - Evaluate the quality of content in the following resume: - - RESUME: - {self.preprocessed_cv} - - Please analyze: - 1. Use of strong action verbs and achievement-oriented language - 2. Quantification of achievements (metrics, percentages, numbers) - 3. Specificity vs. vagueness in descriptions - 4. Relevance of included information - 5. Balance between technical details and high-level accomplishments - 6. Presence of clichés or generic statements vs. unique value propositions - - - Provide specific examples from the resume for each point. - Suggest specific improvements to strengthen the content quality. - - - End your analysis with "Score: XX points" where XX is a score from 0-100 representing the quality of the resume's content. - """ - - response = self.call_llm(prompt, model=self.model) - print("[DEBUG] Content analysis LLM response:\n", response[:300], "...") - - score = self.extract_score(response) - print("[DEBUG] Content score:", score) - - self.analysis_results['content'] = response - self.scores['content'] = score - - def check_errors_and_consistency(self): - """ - Check for errors, inconsistencies, and red flags in the resume - """ - prompt = f""" - Analyze the following resume for errors, inconsistencies, and potential red flags: - - RESUME: - {self.preprocessed_cv} - - Please identify and explain: - 1. Spelling and grammar errors - 2. Inconsistencies in dates, job titles, or other information - 3. Unexplained employment gaps - 4. Formatting inconsistencies - 5. Potential red flags that might concern employers - - - For each issue found, provide the specific text from the resume and suggest a correction. - If no issues are found in a category, explicitly state that. - - - End your analysis with "Score: XX points" where XX is a score from 0-100 representing how error-free and consistent the resume is (100 = perfect, no issues). - """ - - response = self.call_llm(prompt, model=self.model) - print("[DEBUG] Errors analysis LLM response:\n", response[:300], "...") - - score = self.extract_score(response) - print("[DEBUG] Errors score:", score) - - self.analysis_results['errors'] = response - self.scores['errors'] = score - - def simulate_ats_filtering(self): - """ - Simulate how an actual ATS system would evaluate this resume - Uses the pre-analyzed JD keywords for more accurate simulation - """ - # Use the keywords already extracted from JD analysis - if not self.jd_keywords: - print("No keywords available from JD analysis, running JD analysis first") - self.analyze_job_description() - - # Use the keywords from JD analysis - keywords = self.jd_keywords - - if not keywords: - print("Warning: No keywords found in JD analysis") - self.analysis_results['ats_simulation'] = "Error in ATS simulation: No keywords found in job description" - self.scores['ats_simulation'] = 50 # Default middle score - return - - # Calculate keyword matching score with more sophisticated matching - total_importance = sum(kw.get('importance', 5) for kw in keywords) - matched_importance = 0 - matched_keywords = [] - missing_keywords = [] - partial_matches = [] - - for kw in keywords: - keyword = kw.get('keyword', '') - importance = kw.get('importance', 5) - category = kw.get('category', 'Uncategorized') - - # Check for exact matches (case insensitive) - if re.search(r'\b' + re.escape(keyword) + r'\b', self.preprocessed_cv, re.IGNORECASE): - matched_importance += importance - matched_keywords.append({"keyword": keyword, "importance": importance, "category": category, "match_type": "exact"}) - - # Check for partial matches (for multi-word keywords) - elif len(keyword.split()) > 1: - # For multi-word keywords, check if at least 70% of the words are present - words = keyword.lower().split() - matches = 0 - for word in words: - if re.search(r'\b' + re.escape(word) + r'\b', self.preprocessed_cv.lower()): - matches += 1 - - match_percentage = matches / len(words) - if match_percentage >= 0.7: # At least 70% of words match - partial_value = importance * match_percentage - matched_importance += partial_value - partial_matches.append({"keyword": keyword, "importance": importance, - "category": category, "match_type": "partial", - "match_percentage": f"{match_percentage:.0%}"}) - else: - missing_keywords.append({"keyword": keyword, "importance": importance, "category": category}) - else: - missing_keywords.append({"keyword": keyword, "importance": importance, "category": category}) - - # Calculate ATS score (0-100) - if total_importance > 0: - ats_score = (matched_importance / total_importance) * 100 - else: - ats_score = 0 - - # Group keywords by category for better reporting - matched_by_category = {} - partial_by_category = {} - missing_by_category = {} - - for kw in matched_keywords: - category = kw['category'] - if category not in matched_by_category: - matched_by_category[category] = [] - matched_by_category[category].append(kw) - - for kw in partial_matches: - category = kw['category'] - if category not in partial_by_category: - partial_by_category[category] = [] - partial_by_category[category].append(kw) - - for kw in missing_keywords: - category = kw['category'] - if category not in missing_by_category: - missing_by_category[category] = [] - missing_by_category[category].append(kw) - - # Generate detailed report - report = f"""## ATS Simulation Results - -### Overall ATS Score: {ats_score:.1f}/100 - -""" - - # Add matched keywords section - report += "### Exact Keyword Matches\n\n" - for category, keywords in matched_by_category.items(): - report += f"**{category}**:\n" - for kw in sorted(keywords, key=lambda x: x['importance'], reverse=True): - report += f"- {kw['keyword']} (Importance: {kw['importance']}/10)\n" - report += "\n" - - # Add partial matches section - if partial_matches: - report += "### Partial Keyword Matches\n\n" - for category, keywords in partial_by_category.items(): - report += f"**{category}**:\n" - for kw in sorted(keywords, key=lambda x: x['importance'], reverse=True): - report += f"- {kw['keyword']} (Importance: {kw['importance']}/10, Match: {kw['match_percentage']})\n" - report += "\n" - - # Add missing keywords section - report += "### Missing Keywords\n\n" - for category, keywords in missing_by_category.items(): - report += f"**{category}**:\n" - for kw in sorted(keywords, key=lambda x: x['importance'], reverse=True): - report += f"- {kw['keyword']} (Importance: {kw['importance']}/10)\n" - report += "\n" - - # Add ATS passage likelihood with more detailed assessment - if ats_score >= 85: - passage = "Very high likelihood of passing ATS filters - Resume is extremely well-matched to this job" - elif ats_score >= 70: - passage = "High likelihood of passing ATS filters - Resume is well-matched to this job" - elif ats_score >= 55: - passage = "Moderate likelihood of passing ATS filters - Resume has adequate matching but could be improved" - elif ats_score >= 40: - passage = "Low likelihood of passing ATS filters - Resume needs significant improvements for this job" - else: - passage = "Very low likelihood of passing ATS filters - Resume is not well-matched to this job" - - report += f"### ATS Passage Assessment\n\n{passage}\n\n" - - # Add specific recommendations based on missing keywords - report += "### Key Recommendations\n\n" - - # Get top 5 missing keywords by importance - top_missing = sorted(missing_keywords, key=lambda x: x.get('importance', 0), reverse=True)[:5] - if top_missing: - report += "Consider adding these high-importance missing keywords to your resume:\n" - for kw in top_missing: - report += f"- {kw['keyword']} (Importance: {kw['importance']}/10, Category: {kw['category']})\n" - - report += f"\nScore: {int(ats_score)} points" - - self.analysis_results['ats_simulation'] = report - self.scores['ats_simulation'] = min(100, ats_score) - - def analyze_industry_specific(self): - """ - Perform industry and job role specific analysis - """ - # First, identify the industry and job role - industry_prompt = f""" - Based on the following job description, identify the specific industry and job role. - - JOB DESCRIPTION: - {self.jd_text} - - Format your response as a JSON object with this structure: - {{"industry": "Technology", "job_role": "Software Engineer"}} - - - Be specific about both the industry and job role. - """ - - response = self.call_llm(industry_prompt, model=self.model) - - # Parse the JSON response - try: - # Find JSON in the response - json_match = re.search(r'\{\s*"industry"\s*:.+?\}', response, re.DOTALL) - if json_match: - response = json_match.group(0) - - job_info = json.loads(response) - industry = job_info.get('industry', 'General') - job_role = job_info.get('job_role', 'General') - except Exception as e: - print(f"Error parsing industry JSON: {e}") - industry = "Technology" # Default fallback - job_role = "Professional" # Default fallback - - # Now perform industry-specific analysis - industry_analysis_prompt = f""" - Analyze this resume for a {job_role} position in the {industry} industry. - - JOB DESCRIPTION: - {self.jd_text} - - RESUME: - {self.preprocessed_cv} - - Please provide an industry-specific analysis considering: - 1. Industry-specific terminology and keywords in the resume - 2. Relevant industry experience and understanding - 3. Industry-specific certifications and education - 4. Industry trends awareness - 5. Industry-specific achievements and metrics - - - For each point, evaluate how well the resume demonstrates industry alignment. - Provide specific recommendations for improving industry relevance. - - - End your analysis with "Score: XX points" where XX is a score from 0-100 representing how well the resume aligns with this specific industry and role. - """ - - response = self.call_llm(industry_analysis_prompt, model=self.model) - score = self.extract_score(response) - - self.analysis_results['industry_specific'] = response - self.scores['industry_specific'] = score - - def suggest_resume_improvements(self): - """ - Generate specific suggestions to improve the resume for this job - """ - prompt = f""" - Based on the comprehensive analysis of this resume against the job description, provide specific, actionable improvements. - - JOB DESCRIPTION: - {self.jd_text} - - RESUME: - {self.preprocessed_cv} - - ANALYSIS RESULTS: - Keywords Analysis: {self.scores.get('keywords', 'N/A')}/100 - Experience Match: {self.scores.get('experience', 'N/A')}/100 - Format & Readability: {self.scores.get('format', 'N/A')}/100 - Content Quality: {self.scores.get('content', 'N/A')}/100 - Errors & Consistency: {self.scores.get('errors', 'N/A')}/100 - ATS Simulation: {self.scores.get('ats_simulation', 'N/A')}/100 - Industry Alignment: {self.scores.get('industry_specific', 'N/A')}/100 - - - Please provide specific, actionable improvements in these categories: - - - 1. CRITICAL ADDITIONS: Keywords and qualifications that must be added - 2. CONTENT ENHANCEMENTS: How to strengthen existing content - 3. FORMAT IMPROVEMENTS: Structural changes to improve ATS compatibility - 4. REMOVAL SUGGESTIONS: Content that should be removed or de-emphasized - 5. SECTION-BY-SECTION RECOMMENDATIONS: Specific improvements for each resume section - - - For each suggestion, provide a clear before/after example where possible. - Focus on the most impactful changes that will significantly improve ATS performance and human readability. - """ - - response = self.call_llm(prompt, model=self.model) - self.improvement_suggestions = response - return response - - def analyze_competitive_position(self): - """ - Analyze the competitive position of this resume in the current job market - """ - prompt = f""" - Analyze how competitive this resume would be in the current job market for this position. - - JOB DESCRIPTION: - {self.jd_text} - - RESUME: - {self.preprocessed_cv} - - Please provide a competitive analysis including: - - - 1. MARKET COMPARISON: How this resume compares to typical candidates for this role - 2. STANDOUT STRENGTHS: The most impressive qualifications compared to the average candidate - 3. COMPETITIVE WEAKNESSES: Areas where the candidate may fall behind competitors - 4. DIFFERENTIATION FACTORS: Unique elements that set this resume apart (positively or negatively) - 5. HIRING PROBABILITY: Assessment of the likelihood of getting an interview (Low/Medium/High) - - - Base your analysis on current job market trends and typical qualifications for this role and industry. - Be honest but constructive in your assessment. - - - End with a competitive score from 0-100 representing how well this resume would compete against other candidates. - """ - - response = self.call_llm(prompt, model=self.model) - score = self.extract_score(response) - - self.analysis_results['competitive'] = response - self.scores['competitive'] = score - return response - - def generate_optimized_resume(self): - """ - Generate an optimized version of the resume tailored to the job description - """ - prompt = f""" - Create an optimized version of this resume specifically tailored for the job description. - - JOB DESCRIPTION: - {self.jd_text} - - CURRENT RESUME: - {self.preprocessed_cv} - - Please rewrite the resume to: - 1. Incorporate all relevant keywords from the job description - 2. Highlight the most relevant experience and qualifications - 3. Use ATS-friendly formatting and structure - 4. Quantify achievements where possible - 5. Remove or downplay irrelevant information - - - The optimized resume should maintain truthfulness while presenting the candidate in the best possible light for this specific position. - Use standard resume formatting with clear section headers. - """ - - response = self.call_llm(prompt, model=self.model) - self.optimized_resume = response - return response - - def generate_final_score_and_recommendations(self): - """ - Generate final score with weighted categories and overall recommendations - Adjusted to give more weight to JD-specific factors - """ - # Define weights for different categories with higher emphasis on JD-specific factors - weights = { - 'ats_simulation': 0.30, # Direct ATS simulation is most important - 'keywords': 0.25, # Keywords are critical for ATS - 'experience': 0.20, # Experience match is very important - 'industry_specific': 0.15, # Industry relevance - 'content': 0.05, # Content quality - 'format': 0.03, # Format and readability - 'errors': 0.02, # Errors and consistency - } - - # Calculate weighted score - weighted_sum = 0 - used_weights_sum = 0 - category_scores = {} - - for category, weight in weights.items(): - if category in self.scores: - score = self.scores[category] - weighted_sum += score * weight - used_weights_sum += weight - category_scores[category] = score - - # Calculate final score - if used_weights_sum > 0: - final_score = weighted_sum / used_weights_sum - else: - final_score = 0 - - self.scores['final'] = final_score - - # Prepare JD analysis summary for the prompt - jd_summary = "" - if self.jd_analysis: - jd_summary = "JOB DESCRIPTION ANALYSIS:\n" - if self.jd_analysis.get('required_qualifications'): - jd_summary += "Required Qualifications: " + ", ".join(self.jd_analysis.get('required_qualifications')[:5]) + "\n" - if self.jd_analysis.get('technical_skills'): - jd_summary += "Technical Skills: " + ", ".join(self.jd_analysis.get('technical_skills')[:5]) + "\n" - if self.jd_analysis.get('key_responsibilities'): - jd_summary += "Key Responsibilities: " + ", ".join(self.jd_analysis.get('key_responsibilities')[:3]) + "\n" - - # Generate final recommendations - prompt = f""" - Based on the comprehensive analysis of this resume against the job description, provide a final assessment and recommendations. - - {jd_summary} - - RESUME ANALYSIS SCORES: - ATS Simulation Score: {category_scores.get('ats_simulation', 'N/A')}/100 (30% of final score) - Keywords Match: {category_scores.get('keywords', 'N/A')}/100 (25% of final score) - Experience Match: {category_scores.get('experience', 'N/A')}/100 (20% of final score) - Industry Alignment: {category_scores.get('industry_specific', 'N/A')}/100 (15% of final score) - Content Quality: {category_scores.get('content', 'N/A')}/100 (5% of final score) - Format & Readability: {category_scores.get('format', 'N/A')}/100 (3% of final score) - Errors & Consistency: {category_scores.get('errors', 'N/A')}/100 (2% of final score) - - FINAL WEIGHTED SCORE: {final_score:.1f}/100 - - Please provide a detailed final assessment with: - - 1. EXECUTIVE SUMMARY: A concise summary of how well this resume matches this specific job description - - 2. STRENGTHS: The top 3 strengths of this resume for this specific job - - 3. CRITICAL IMPROVEMENTS: The top 3 most critical improvements needed to better match this job description - - 4. ATS ASSESSMENT: An assessment of the resume's likelihood of passing ATS filters for this specific job - - 5. INTERVIEW POTENTIAL: An assessment of whether this resume would likely lead to an interview - - 6. FINAL RECOMMENDATION: A clear verdict on whether the candidate should: - a) Apply with this resume as is - b) Make minor improvements before applying - c) Make major improvements before applying - - Be specific about which improvements would have the biggest impact on ATS performance for this particular job. - """ - - response = self.call_llm(prompt, model=self.model) - self.final_report = f"Final ATS Score for This Job: {final_score:.1f}/100\n\n{response}" - - def generate_visual_report(self, output_path="html/ats_report.html"): - """ - Generate a visual HTML report with charts and formatted analysis - - Args: - output_path (str): Path to save the HTML report - - Returns: - str: Path to the generated report - """ - try: - # Ensure the output directory exists - output_dir = os.path.dirname(output_path) - if output_dir and not os.path.exists(output_dir): - os.makedirs(output_dir, exist_ok=True) - - # Create radar chart for scores - categories = [ - 'Keywords', 'Experience', 'ATS Simulation', - 'Industry Fit', 'Content Quality', 'Format', 'Errors' - ] - - values = [ - self.scores.get('keywords', 0), - self.scores.get('experience', 0), - self.scores.get('ats_simulation', 0), - self.scores.get('industry_specific', 0), - self.scores.get('content', 0), - self.scores.get('format', 0), - self.scores.get('errors', 0) - ] - - # Create radar chart - fig = plt.figure(figsize=(10, 6)) - ax = fig.add_subplot(111, polar=True) - - # Calculate angles for each category - angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist() - - # Close the plot - values.append(values[0]) - angles.append(angles[0]) - categories.append(categories[0]) - - # Plot data - ax.plot(angles, values, 'o-', linewidth=2) - ax.fill(angles, values, alpha=0.25) - ax.set_thetagrids(np.degrees(angles[:-1]), categories[:-1]) - ax.set_ylim(0, 100) - plt.title('Resume ATS Analysis Results', size=15) - - # Convert plot to base64 for embedding in HTML - buffer = BytesIO() - plt.savefig(buffer, format='png') - buffer.seek(0) - img_str = base64.b64encode(buffer.read()).decode() - plt.close() - - # Import markdown to HTML converter - try: - import markdown - markdown_available = True - except ImportError: - print("Warning: markdown package not installed. Markdown formatting will not be rendered.") - print("Install with: pip install markdown") - markdown_available = False - - # Function to convert markdown to HTML - def md_to_html(text): - if markdown_available: - # Convert markdown to HTML - try: - return markdown.markdown(text) - except: - return f"
    {text}
    " - else: - return f"
    {text}
    " - - # Create HTML report - html_content = f""" - - - - ATS Analysis Report - - - -
    -
    -

    Resume ATS Analysis Report

    -

    Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

    -
    - -
    -
    Final ATS Score
    -
    {self.scores.get('final', 0):.1f}/100
    -
    -
    -
    -

    This score represents the overall effectiveness of your resume for this specific job.

    -
    - -
    -

    Score Breakdown

    - ATS Analysis Chart -
    - -
    -

    Executive Summary

    -
    {md_to_html(self.final_report)}
    -
    - -
    -

    ATS Simulation Results

    -
    {md_to_html(self.analysis_results.get('ats_simulation', 'Not available'))}
    -
    - -
    -

    Recommended Improvements

    -
    {md_to_html(self.improvement_suggestions)}
    -
    - -
    -

    Detailed Analysis

    - -

    Keywords Match ({self.scores.get('keywords', 0)}/100)

    -
    -
    -
    -
    {md_to_html(self.analysis_results.get('keywords', 'Not available'))}
    - -

    Experience & Qualifications ({self.scores.get('experience', 0)}/100)

    -
    -
    -
    -
    {md_to_html(self.analysis_results.get('experience', 'Not available'))}
    - -

    Format & Readability ({self.scores.get('format', 0)}/100)

    -
    -
    -
    -
    {md_to_html(self.analysis_results.get('format', 'Not available'))}
    - -

    Content Quality ({self.scores.get('content', 0)}/100)

    -
    -
    -
    -
    {md_to_html(self.analysis_results.get('content', 'Not available'))}
    - -

    Errors & Consistency ({self.scores.get('errors', 0)}/100)

    -
    -
    -
    -
    {md_to_html(self.analysis_results.get('errors', 'Not available'))}
    - -

    Industry Alignment ({self.scores.get('industry_specific', 0)}/100)

    -
    -
    -
    -
    {md_to_html(self.analysis_results.get('industry_specific', 'Not available'))}
    -
    - -
    -

    Competitive Analysis

    -
    {md_to_html(self.analysis_results.get('competitive', 'Not available'))}
    -
    - -
    -

    Usage Statistics

    -

    LLM API Calls: {self.llm_call_count}

    -

    Total Tokens Used: {self.total_tokens}

    -

    Analysis Time: {self.total_time:.2f} seconds

    -
    -
    - - - """ - - # Write HTML to file - with open(output_path, 'w', encoding='utf-8') as f: - f.write(html_content) - - return output_path - - except Exception as e: - print(f"Error generating visual report: {e}") - return None - - def generate_text_report(self): - """ - Generate a text-based report of the analysis - - Returns: - str: Formatted text report - """ - report = "=== ATS ANALYSIS REPORT ===\n\n" - - # Add final score - report += f"FINAL SCORE: {self.scores.get('final', 0):.1f}/100\n\n" - - # Add individual scores - report += "SCORE BREAKDOWN:\n" - report += f"- Keywords Match: {self.scores.get('keywords', 0)}/100\n" - report += f"- Experience Match: {self.scores.get('experience', 0)}/100\n" - report += f"- ATS Simulation: {self.scores.get('ats_simulation', 0)}/100\n" - report += f"- Format & Readability: {self.scores.get('format', 0)}/100\n" - report += f"- Content Quality: {self.scores.get('content', 0)}/100\n" - report += f"- Errors & Consistency: {self.scores.get('errors', 0)}/100\n" - report += f"- Industry Alignment: {self.scores.get('industry_specific', 0)}/100\n\n" - - # Add final report - report += "EXECUTIVE SUMMARY:\n" - report += f"{self.final_report}\n\n" - - # Add improvement suggestions - report += "RECOMMENDED IMPROVEMENTS:\n" - report += f"{self.improvement_suggestions}\n\n" - - # Add usage statistics - report += "USAGE STATISTICS:\n" - report += f"- LLM API Calls: {self.llm_call_count}\n" - report += f"- Total Tokens Used: {self.total_tokens}\n" - report += f"- Analysis Time: {self.total_time:.2f} seconds\n" - - return report - - def call_llm(self, prompt, model=1): - """ - Call the LLM API with the given prompt - - Args: - prompt (str): The prompt to send to the LLM - model (int): Model selection (1=OpenAI, 2=Groq) - - Returns: - str: The LLM response - """ - try: - # Check if .env file exists and load it - env_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '.env') - if not os.path.exists(env_file_path): - print(f"Warning: .env file not found at {env_file_path}") - print("Creating a default .env file with placeholders for API keys") - with open(env_file_path, 'w') as f: - f.write("# API Keys for ATS Analyzer\n") - f.write("# Replace with your actual API keys\n\n") - f.write("# OpenAI API Key\n") - f.write("OPENAI_API_KEY=your_openai_api_key_here\n\n") - f.write("# Groq API Key (optional, only needed if using model=2)\n") - f.write("GROQ_API_KEY=your_groq_api_key_here\n") - print(f"Please edit {env_file_path} and add your API keys") - - # Fallback to dummy response if no API keys are available - return self._generate_dummy_response(prompt) - - # Reload environment variables - from dotenv import load_dotenv - load_dotenv(env_file_path) - - if model == 1: - # Get OpenAI API key from environment variables - openai_api_key = OPENAI_API_KEY - if not openai_api_key or openai_api_key == "your_openai_api_key_here": - print("Error: OpenAI API key not found or not set in .env file") - print(f"Please edit {env_file_path} and add your OpenAI API key") - # Fallback to model 2 if OpenAI API key is not available - if model == 1: - print("Attempting to use Groq API instead...") - return self.call_llm(prompt, model=2) - else: - return self._generate_dummy_response(prompt) - - client = openai.OpenAI(api_key=openai_api_key) - response = client.chat.completions.create( - model="gpt-4o-mini", # Can be configured as a parameter - messages=[ - {"role": "system", "content": "You are an expert resume analyst and ATS specialist."}, - {"role": "user", "content": prompt} - ], - temperature=0.1, - max_tokens=1500 - ) - - self.llm_call_count += 1 - self.total_tokens += response.usage.total_tokens - return response.choices[0].message.content.strip() - - elif model == 2: - try: - from groq import Groq - except ImportError: - print("Error: Groq package not installed. Please install it with 'pip install groq'") - print("Falling back to OpenAI API...") - return self.call_llm(prompt, model=1) - - # Get Groq API key from environment variables - groq_api_key = GROQ_API_KEY - if not groq_api_key or groq_api_key == "your_groq_api_key_here": - print("Error: Groq API key not found or not set in .env file") - print(f"Please edit {env_file_path} and add your Groq API key") - print("Falling back to OpenAI API...") - return self.call_llm(prompt, model=1) - - client = Groq(api_key=groq_api_key) - completion = client.chat.completions.create( - model="meta-llama/llama-4-maverick-17b-128e-instruct", - messages=[ - { - "role": "system", - "content": "You are an expert resume analyst and ATS specialist." - }, - { - "role": "user", - "content": prompt - } - ], - temperature=0.1, - max_completion_tokens=1500, - top_p=1, - stream=False, - stop=None, - ) - - self.llm_call_count += 1 - self.total_tokens += completion.usage.total_tokens - return completion.choices[0].message.content.strip() - - else: - return "Error: Invalid model selection" - - except Exception as e: - print(f"Error calling LLM API: {e}") - # If there's an error with the API call, generate a dummy response - return self._generate_dummy_response(prompt) - - def _generate_dummy_response(self, prompt): - """ - Generate a dummy response when API calls fail - This is used for testing or when API keys are not available - - Args: - prompt (str): The original prompt - - Returns: - str: A dummy response - """ - print("Generating dummy response for testing purposes...") - - # Check what kind of analysis is being requested - if "keywords" in prompt.lower(): - return "This is a dummy keywords analysis.\n\nThe resume contains some keywords that match the job description, but could be improved by adding more specific technical skills and qualifications.\n\nScore: 65 points" - elif "experience" in prompt.lower(): - return "This is a dummy experience analysis.\n\nThe candidate's experience partially matches the job requirements. Some areas could be strengthened to better align with the position.\n\nScore: 70 points" - elif "format" in prompt.lower(): - return "This is a dummy format analysis.\n\nThe resume has a clean format but could be improved with better section organization and more consistent formatting.\n\nScore: 75 points" - elif "content" in prompt.lower(): - return "This is a dummy content quality analysis.\n\nThe content is generally good but could use more quantifiable achievements and specific examples.\n\nScore: 68 points" - elif "errors" in prompt.lower(): - return "This is a dummy errors analysis.\n\nThe resume has few grammatical errors but some inconsistencies in formatting and punctuation.\n\nScore: 80 points" - elif "industry" in prompt.lower(): - return "This is a dummy industry analysis.\n\nThe resume shows good industry alignment but could benefit from more industry-specific terminology.\n\nScore: 72 points" - elif "competitive" in prompt.lower(): - return "This is a dummy competitive analysis.\n\nThe resume is competitive but could be strengthened in areas of technical expertise and project outcomes.\n\nScore: 70 points" - elif "improvements" in prompt.lower(): - return "This is a dummy improvement suggestions.\n\n1. Add more technical keywords from the job description\n2. Quantify achievements with specific metrics\n3. Improve formatting for better ATS readability" - elif "final assessment" in prompt.lower(): - return "This is a dummy final assessment.\n\nThe resume is generally well-aligned with the job description but has room for improvement in keyword matching and experience presentation.\n\nFinal recommendation: Make minor improvements before applying." - else: - return "This is a dummy response for testing purposes. In a real scenario, this would contain a detailed analysis based on your prompt.\n\nScore: 70 points" - - def extract_score(self, response_text): - """ - Extract score from LLM response - - Args: - response_text (str): LLM response text - - Returns: - int: Extracted score (0-100) - """ - import re - - # Look for score in format "Score: XX points" or similar patterns - patterns = [ - r'Score:\s*(\d+)\s*points', - r'Score:\s*(\d+)', - r'score of\s*(\d+)', - r'rated at\s*(\d+)', - r'(\d+)/100', - r'(\d+)\s*out of\s*100' - ] - - for pattern in patterns: - match = re.search(pattern, response_text, re.IGNORECASE) - if match: - score = int(match.group(1)) - # Ensure score is in range 0-100 - return max(0, min(100, score)) - - # Default score if no match found - return 50 - - def run_full_analysis(self, advanced=True, generate_html=True): - """ - Run the complete resume analysis - - Args: - advanced (bool): Whether to run advanced analyses - generate_html (bool): Whether to generate HTML report - - Returns: - str: Path to the report or text report - """ - start_time = time.time() - - print("Starting ATS analysis for this specific job description...") - - # Extract and preprocess resume text (this also analyzes the JD) - self.extract_and_preprocess() - - print(f"Analyzing resume against {len(self.jd_keywords)} job-specific keywords...") - - # Run basic analyses - self.analyze_keywords() - self.analyze_experience_and_qualifications() - self.analyze_format_and_readability() - self.analyze_content_quality() - self.check_errors_and_consistency() - - # Run advanced analyses if requested - if advanced: - print("Running advanced ATS simulation...") - self.simulate_ats_filtering() - self.analyze_industry_specific() - self.analyze_competitive_position() - - # Generate improvement suggestions - print("Generating job-specific improvement suggestions...") - self.suggest_resume_improvements() - - # Generate final score and report - print("Calculating final ATS score for this job...") - self.generate_final_score_and_recommendations() - - # Record total time - self.total_time = time.time() - start_time - print(f"Analysis completed in {self.total_time:.1f} seconds") - - # Print usage statistics to console - self.print_usage_statistics() - - # Generate and return report - if generate_html: - print("Generating visual HTML report...") - report_path = self.generate_visual_report() - print(f"HTML report generated: {report_path}") - return report_path - else: - return self.generate_text_report() - - def print_usage_statistics(self): - """ - Print usage statistics to console - """ - print("\n===== USAGE STATISTICS =====") - print(f"LLM API Calls: {self.llm_call_count}") - print(f"Total Tokens Used: {self.total_tokens}") - print(f"Analysis Time: {self.total_time:.2f} seconds") - - print("\n===== SCORE BREAKDOWN =====") - print(f"Final ATS Score: {self.scores.get('final', 0):.1f}/100") - print(f"Keywords Match: {self.scores.get('keywords', 0)}/100") - print(f"Experience Match: {self.scores.get('experience', 0)}/100") - print(f"ATS Simulation: {self.scores.get('ats_simulation', 0)}/100") - print(f"Format & Readability: {self.scores.get('format', 0)}/100") - print(f"Content Quality: {self.scores.get('content', 0)}/100") - print(f"Errors & Consistency: {self.scores.get('errors', 0)}/100") - print(f"Industry Alignment: {self.scores.get('industry_specific', 0)}/100") - print("============================\n") diff --git a/backend/main.py b/backend/main.py index 2e2cc8b..80c481c 100644 --- a/backend/main.py +++ b/backend/main.py @@ -19,7 +19,7 @@ # from langfuse import Langfuse # from langfuse.callback import CallbackHandler -from ats_analyzer_improved import ATSAnalyzer +from ATS_agent.ats_analyzer_improved import ATSAnalyzer # 캐시 저장소