From b14a396b91e261c964cf1360bd024a710d9869c9 Mon Sep 17 00:00:00 2001 From: ehddnr301 Date: Sat, 5 Apr 2025 06:05:30 +0000 Subject: [PATCH 1/5] Implement QueryRefinedAgainChain for enhanced query refinement process --- llm_utils/chains.py | 57 +++++++++++++++++++++++++++++++++++++++++++++ llm_utils/graph.py | 25 ++++++++++++++++++-- 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/llm_utils/chains.py b/llm_utils/chains.py index d9e5e6c..271dc29 100644 --- a/llm_utils/chains.py +++ b/llm_utils/chains.py @@ -72,6 +72,61 @@ def create_query_refiner_chain(llm): return tool_choice_prompt | llm +# QueryRefinedAgainChain +def create_query_redefined_again_chain(llm): + query_redefined_again_prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + """ + 당신은 데이터 분석 전문가(데이터 분석가 페르소나)입니다. + 사용자의 질문과 이미 구체화된 질문을 바탕으로, 실제 사용 가능한 테이블과 컬럼 정보를 검토하여 + 더욱 정교하게 질문을 재정의해 주세요. + + 주의사항: + - 이전에 구체화된 질문을 기반으로 하되, 실제 DB 환경에서 사용 가능한 테이블/컬럼을 고려해 현실적인 분석 방향을 제시하세요. + - 불필요한 재질문 없이, 주어진 데이터로 최대한 분석 가능한 형태로 질문을 구체화하세요. + - 테이블 구조에 맞게 분석 질문을 조정하고, 필요한 가정을 추가하세요. + - 최종 출력 형식은 반드시 아래와 같아야 합니다. + + 최종 형태 예시: + + <최종 구체화된 질문> + ``` + 최근 30일간 결제 금액이 10만원 이상인 사용자들의 서비스 이용 패턴과 이탈율을 분석하여, + 어떤 활동 패턴을 보이는 고액 결제자가 이탈하는지 파악 + ``` + + <분석 접근 방향> + ``` + 1. subscription_activities와 contract_activities 테이블을 조인하여 고액 결제자 식별 + 2. 해당 사용자들의 activity_type 분포 확인 + 3. 이탈 사용자(30일 이상 미접속)와 활성 사용자의 행동 패턴 비교 분석 + 4. 주요 이탈 지점 식별 + ``` + """, + ), + ( + "system", + "아래는 사용자의 원래 질문 및 1차 구체화된 질문입니다:", + ), + MessagesPlaceholder(variable_name="user_input"), + MessagesPlaceholder(variable_name="refined_input"), + ( + "system", + "다음은 사용자의 DB 환경정보와 실제 사용 가능한 테이블 및 컬럼 정보입니다:", + ), + MessagesPlaceholder(variable_name="user_database_env"), + MessagesPlaceholder(variable_name="searched_tables"), + ( + "system", + "위 정보를 바탕으로 DB 구조에 맞게 더욱 구체화된 최종 질문과 분석 접근 방향을 최종 형태 예시와 같은 형식으로 작성해주세요.", + ), + ] + ) + return query_redefined_again_prompt | llm + + # QueryMakerChain def create_query_maker_chain(llm): query_maker_prompt = ChatPromptTemplate.from_messages( @@ -110,6 +165,7 @@ def create_query_maker_chain(llm): ), MessagesPlaceholder(variable_name="user_input"), MessagesPlaceholder(variable_name="refined_input"), + MessagesPlaceholder(variable_name="refined_input_again"), ( "system", "다음은 사용자의 db 환경정보와 사용 가능한 테이블 및 컬럼 정보입니다:", @@ -126,4 +182,5 @@ def create_query_maker_chain(llm): query_refiner_chain = create_query_refiner_chain(llm) +query_redefined_again_chain = create_query_redefined_again_chain(llm) query_maker_chain = create_query_maker_chain(llm) diff --git a/llm_utils/graph.py b/llm_utils/graph.py index 0aef51d..c65e75f 100644 --- a/llm_utils/graph.py +++ b/llm_utils/graph.py @@ -10,6 +10,7 @@ from llm_utils.chains import ( query_refiner_chain, + query_redefined_again_chain, query_maker_chain, ) @@ -17,6 +18,7 @@ # 노드 식별자 정의 QUERY_REFINER = "query_refiner" +QUERY_REFINED_AGAIN = "query_redefined_again" GET_TABLE_INFO = "get_table_info" TOOL = "tool" TABLE_FILTER = "table_filter" @@ -30,6 +32,7 @@ class QueryMakerState(TypedDict): searched_tables: dict[str, dict[str, str]] best_practice_query: str refined_input: str + refined_input_again: str generated_query: str @@ -90,6 +93,20 @@ def get_table_info_node(state: QueryMakerState): return state +def query_redefined_again_node(state: QueryMakerState): + res = query_redefined_again_chain.invoke( + input={ + "user_input": [state["messages"][0].content], + "refined_input": [state["refined_input"]], + "user_database_env": [state["user_database_env"]], + "searched_tables": [json.dumps(state["searched_tables"])], + } + ) + state["refined_input_again"] = res + print(state["refined_input_again"]) + return state + + # 노드 함수: QUERY_MAKER 노드 def query_maker_node(state: QueryMakerState): res = query_maker_chain.invoke( @@ -121,7 +138,9 @@ def query_maker_node_with_db_guide(state: QueryMakerState): res = chain.invoke( input={ "input": "\n\n---\n\n".join( - [state["messages"][0].content] + [state["refined_input"].content] + [state["messages"][0].content] + # + [state["refined_input"].content] + + [state["refined_input_again"].content] ), "table_info": [json.dumps(state["searched_tables"])], "top_k": 10, @@ -143,10 +162,12 @@ def query_maker_node_with_db_guide(state: QueryMakerState): builder.add_node( QUERY_MAKER, query_maker_node_with_db_guide ) # query_maker_node_with_db_guide +builder.add_node(QUERY_REFINED_AGAIN, query_redefined_again_node) # 기본 엣지 설정 builder.add_edge(QUERY_REFINER, GET_TABLE_INFO) -builder.add_edge(GET_TABLE_INFO, QUERY_MAKER) +builder.add_edge(GET_TABLE_INFO, QUERY_REFINED_AGAIN) +builder.add_edge(QUERY_REFINED_AGAIN, QUERY_MAKER) # QUERY_MAKER 노드 후 종료 builder.add_edge(QUERY_MAKER, END) From fa9652eae8aa4d3c313ae71508454175d3ff8104 Mon Sep 17 00:00:00 2001 From: ehddnr301 Date: Sat, 5 Apr 2025 06:06:16 +0000 Subject: [PATCH 2/5] Remove obsolete binary files --- table_info_db/index.faiss | Bin 30765 -> 0 bytes table_info_db/index.pkl | Bin 2920 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 table_info_db/index.faiss delete mode 100644 table_info_db/index.pkl diff --git a/table_info_db/index.faiss b/table_info_db/index.faiss deleted file mode 100644 index 2ece6ffdfe921fd6486326b0b6b094416bb9e040..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 30765 zcmXtA2V9Qd+iz(IMWi7mDiX=8`&=g(*(+r46|!e`O1qSVh6ZKtUC+6W2tO-(%Z%(T zd%f4`ec%7*^ZWRh=eh55&UIbi_1urHA)T7*8SCj)()<7aS^nSu`QJbF_0^Ac|Mzzv zi(W%OZ%>}Jw!e74{|8jjZ^4Z$!&&Fco=|7f%L4jYeE4%8%h$aI?^hW(q{|@QdYvzL z?v8?Sfm^u6&F1VxzYJ0Tp1WLr@guW(HHmkM=ng$TSh2z3-SSV>d8gakbviUUyGE{P z)P-qY#Ypdw2j4g1HkXD9Xm=E@e(j(w>v$9T2ieKg@A~-oQ5@K&yo5!~vI~l@+Trb- z4q!jN0UAgb-p;j|^n80%+cqj6UpKI2+Rp^=+3^LckLo4oMmowCae6qlxCz@n?vEA~ z@=>$T8_Kc%DVT0QP!7H6CP#(r#xUCfIQm2lG}|A8Ych90j?)Jix*BA%d2LSjm$&NH z^%>SIuyd}c}-;G91r=crwylih-m&#+&{Jj>Gx=8 z8Vfi4r(!_wJ(xOSDU3aROT8bcM$jBypiBJ{Tsy``__Wf49=jTX=UZPn^UW$*rDK-v zbyj=!rSuZi{JIuKHt8nMeA$3kc09yhdd7V5B1743QU`I21O9s-mBpM&B*}^t98zW!U$WM$o-Lv2l7rKt&LlU?gtN>Lg{hGgdaKCsN(GU zH}I5Wdp6~zjwfK|rcWaE`*N6K;>4G<9EX0Roq0%BEL2%^59SbRE^pSe+R=?g(S?pIaq11-*Db?&eoPyjYCay~>+Xcs z>y8RFN7s#8^BRpT(aHBEOf@gly4-GvO$N?Gnhlu?iL8ywO7T244h}w93~mLv(EUX^ zUxiB{v-=GBIBg1~|L!E4HuTa3|C)u%&h3Pahx2jAt83Ws`w-NnHS)O?_z(vDnGI`d z-@yMGW@z*ulsmLXkK~E`+oyI=W5<5z;dx&C4y-Osw! zM^0|j<#mjfdsaNw-gqR7lLJQq%|BMZJD-p3xw~Q>@;aK*+P8|{r&jAMZ0qv2>uajH zf$Gm12-?4>Gc{9$nvLT_ZCA_wW1ms48*7IS9uc%Zc4mKTu6p%Sy^j3xkVD$+L*oS1 z8=h<%fg6JR1O1HNTbc^EP#Ce>-yKzLpFF*ck274G7E#5h*4ozksCaaJ1V81@3m&lY8D6N~ zmwGoES2Q{Uqd9B-gjSbVU7{eSnIEaO#1c#7Nifr*}ucl_g19z3;Y5p#WQg0 zgEg$~!JayrE&kKr5l0^x53Vm>fa|nEL2HA$OA?W=g(W;GzdP>PSTVRqJpJ5^KZrO48-lN6zdPMv-l+jtTq{!kG-wG?e>=+; z$tyJL6PfVe#*VVMM@O)K*i%yPRNxA&lb3wX(dhNu{^137JnRv~UF)pPTbU}n7F9#z zfh`2_ftLHDzI+yzjF!V;Yd`+E!fs*FKMBt4Ka4A0S@PJPGo{Nm zFLgG!yjFF&&(58PZ`p*ks~Je*L|VfYG+8^DE%~zv0#;6db~%Ij=RqyyYnwN?FGr8A z)0*G#f2-Bp^9>h{e2UAers9vq!-0O*?i*SW&)gK$Z_r3iS?0G-KcJ z(`wfW9&NWXQWW&^=UeOPOLK7HX8X-}T#bJ`_se@I`RpXCC5Az4H62hd@WyNFfUdR) zr@j?5BQR-h9PXImj={^j$|ELgc*CXpkgg%hAH;&fsJ*FC3L}7U3WNV`#>q>JWODl( z*rByEKe{gksb6$dAE2{lH!J_b{m!{mn?`bTpHA%9(hT8NP+uc%#y)2U!G!gLQ1GhxE~yo5FrZ~U?0w2g zUdnhYrd{6*bPm$FR|)K`wnTJ&RF6;T76+a|FMwtjT+3?-wV!uCY&fk6ENi{OHT4bn z9oKX;+dPa@-$LmNBOG_l2noZ~Su37lMJubyE*3#pyYfZM^Xo5truf449d&qFi=T|} zN)D|$0oJ*7M!P5aP++lAVH`F*UWXsjs-e}pOhId6^~WrSJbedj*{7Opq5qZU@HV!o zH-y*vS%SR^o`F&4u zrl=Zare=%NdU@l1Ub0=z9T<5ahY`+mw-y&z>a50`xK42{_gb_PKUKXf&K~}S-mmO= zC-aT?-n$Ptoop+JLv+LeqV3>qtgO5}#->=}X^%%Zsd)}4OgK2{8Yo>e++h&@JbfIy zISyudx+U^M_(ANlhI8U`aqr+yty}#B_?2V`G*f8a>^oe{t_MH;vmyG#4d~pfs-RgG zQHxzT?G>p98J(XX?Zb9_4HbW^gQ?yY=-;U(S6rGtZwB;g%VkbbqR-bREj1qR%#~Kz znz0?N{ry1Ul>adfulkL^&sXomoTZy}4=>w_2|4E6`MMXTwJE@((;D%h=Zr-aT8hhi ztWe=G^%eE1D`}94#Ib>-CC@XxH7-fZ?7c5Q`~B&{l__mFCc3U`7=m$CY3ZTXB1y98kY1inPlbJyTOw-e})zZ^%D zmT5EtJZ`-iFX(lOnbc{AN@vizls<(`l}d5^@EoAOCw<_H;p22*9ok$HFTvr@(_l|h zHA#I1YNiq|>y z+kwZ>?P9@L8&3FyPiN)mrmbSA@Opf}XDDA6jKk{1vzyipIq5jw`sQ3z?^zgJT|O)5 zBNsJuOk7~VhvznxQ##$M(9@AVnmFRy4;O#g$>_Y$WMgeMCfZ44 z9Pdxqd5RH6>WHK8dgw!p3Yp2tQ?LV5O}Lpy3-Qz7pqfoyCRZR~91u4%-&@9#aGamI z-&$#W($>dyv<}>sn@b$F5--&|DcV~5;<4k#nEf~(`i0ZT1gjZ-NZ4F4n`!&TNWxuac??s^2t=Ic;-7s6}h zE3jwzZKgCP^&?xI;KMIwZ@|7arC2}$=Lexqgfo1M*zd$ z_k`W>#$0Z**RJds$>#EtN>ocgV$G^>R#EGt-1#vp@>Jhy2a3OmKMo5@j zp%=dkL-}rY5iILCfb^0tBku>{560?VuDp!@p1Z)qn1#@FNOv|FnXdH33Lsn%gaf2k z|H8`WOABJU?!@jd>}buLHSV_$Nw15M2b>gt7PN7x#@wqcXPe{l$wLkYFK@*!+i+Lt zdQ_ZCGX=9h4B(_ykbE1bS(S?CQ|!%@ewNc6TMDI@Ih~DsaB_+yU!?6HzY?+^djnhCjFYE8 z;-m`SD;)LY+_RM>>1>u&X9u3BvKWfi+^3!;Dfxje9WLpyd zuywmKVdH##{_}J@R2V33KN6(3@XgE3O!beEr6CTfo(>u8udof=ujXD0$*L@`&Uh>4 z$3DgHA*o<|Aoy_27N+qsvc=ER%Q&y;%s|~nbY9wj#U-hyZv1hp7k?^rW=RgwAwXznzVqv z>8pV<0=EBQ8=SSzQQVmn0fwjQ=BKRs3)QM`!l$0Cq?!-o`!P`8(V7z`=qS5j;V$7~ zlbN+7uZU?p1gteb3(!gat_14@|%3$SKo- z@GUMFwRjU279;@e5wCaMAV^mh)R{Diuy2`Y*=HrRA0LF|c_1L!jzym)9vkXdaShVz zBDUv8s26hq2)DVNoge?_z6zWSziIMCUryK{h(nNiiM1(pmXw2$P6{I4x8&!eTEo2V zyM$wV2dVIf{HK`lG*#>IVKt61=!OPuE(yY4!tIGF*WtShuZbCMMzW~$J0uT`-m(aaof=bV6M9`lg0PT~Ydap_fr@*b$} zK{*hn+;*t&4Ynnw>{i_kO3zBd3)**zHgNVJP&$le04e{IXtQ<@3M`$*;E2luEV3CCcrURO{&O#W1mwuaRS1ysN05Haze*wjOx z`=(^ThkM!++<>jT) zLZpog2sa?Sv^G;cMBJ|uuCs z`3#LTGAjO|IbxI_>IkcN?dTmq_oLoy$e$iEZ%tlVWgt+`B9FAmO+kH@KcvfTBwbHA zV5y+}B6&8Im!O%)0sIv8gZ#o39J6?~pzK0NIvNOvIAvxES9MD(WrNC;iL+G(&iqQw z17^PSA@8U>5KxxKtY#cT$~r|i$2*L&8Zq6`T&&#MS>@|Ucu6z;1c|R0&6swss3EGk zKHNS^WnBoNUj)qsdBaBJg>*=s3{A=m&^u(@Wz zSLA+VGpzi02^jN~X@f=o!A-!cRVO(6cDWW>gK+xALgCr)67+uQ^K`GIXVG^r zJr9X*Gc@7XavXoUYy()1_LqJA7RcPa&oFE4cQO57I_$W09muO zV=PQ5=g5S$Q3=>O@)>6LiISxQo5KFT&BT_ERe8@}Yv9SsQDD0_Qy9+}BzjI-$ouwa z?K5!OYM39mL596>mT=ujSU%6ipEJwFpex4G_&{el@Qneti+KvuznRIz>WjsXC0=-Q zyot2CZ7Hw+HwVj|uH%e*39QF8XK3ykrJHu*jxOZjf6#Py6Mibcwj4O*IRfNaM99UD0;@AO|-ifXzdGq3PwTl=u&3B){qaZ{T=Sqv=SF6e{_ESOB_k|W34_^ z6ZJP#7P(XB%U$+!<>dPrs>Vz|?G7Z~J*%U>kj|gIvB>YJxV+L-)f7wK-og2)mi&-; zLrhCC5(R%Y7MvYAfy>&}`GeQhxkJAxyvxw8>bzxE!5k`dDdTlL&eM){mzmHKZK)4#M4?@AKkW27S0v~g@;=S8d>8s1@&WW3tToWRHcvYs!q0qY^nSdWJ*~wVd%m^fJoaCvy&SX52u7#t@eAt{=soTE!p?P| zgILU>pVx%@C$eN=+ZE6|tSQ#oPoH5>Mm6*JDt@(DCIqdYBWo=>0L`D&hU=l>(8AoD zC)fQfdbe$>h20y^4t)BIC#}QL{CEgqU8YRvJO>nJ%-&o}n0LMf6SjJBZABJRJ$VBvg&_w!!G!TesPE_vgdMo*`7u0mYGuLS>g|xuPn&6^ z&-d=`#>|%VhQsqJ%LtERw!m>4n)j;-7uHv$UT7g*%4b2W;d$^`J)F~@qOiaPI`mA% zcQdL1?G5vuT64!KIk>>F5p3<#f(0+zOTRagGcI2S!akoRm4e{YfMVQz!w@^PD`5j4 z`3v`e%jnuL7Yt(4Atd zUC$@!e5^(|#^sYy@^j||xU|g(2As5z9%~k0*2Y#mV(e0U{N0ql-W(|i<0Q>6Pm4Yc z^s`iW{NVK}>A7(R^+6SU-!)L0$9Ce&JFSG954VC%;XT|Lk;!PbKw++KSQmIbwzeGR zH31SGe6)eJLnNI$pY-M~`p!!e?$sWvddi#EUW4xDBnUo|JZb>sx7JT0k$SDY8rRUcIEMo> zZ({Q8(T$SM9d)*YS;YSBx*Ba_1o)v-VV$7f2dinoXK!8u za!4qn*NUv*a17gYfV8qkScw%L=4Omw*d)Rp< z9_0`{X}i}!`hA>(*Le!OH=ZEUTc^R5uB^6d~j zIPw@nj)jBa{;Iq~Mi0B@+SMCWqS#hc?Vh79V(f?@Bz;yh z*)D{aWeoJ5J=T@D_M-R~H-;+=pD@0?)UEyr*UX-3yLKgUkI6Pl41%8`-H} z#?Zy)u4u3@KrE@=K=ciN0r7V%q)Tuz^V$<6mRvk2&f3h^70x>h<@d+Y88+gNOuAsn z;VqE7riLsVG*K6#xd^+y&AE>)z$niwB+LY*TYBz%$*4!vnJWHZ8TOgPfmfl=yqP%X zqN#jz@;11gYa`UTM-^^hb`~eZgsuCKxIz%u!Nuj(IlV_my~EPZAJ*u;l6n$fEwy4k zecl0SF?lh+1OHc8gAu=jX{St}^9IYhH@yiT#F%9(Dmdqdp(U2ze=5p1H|HtkZSYaI zBf21~nm8jZ3#%+Lk=L$zal$S5`NB%gAkseY)8)6g8rZ zztt1fdvuSza4ylBovzJwr1h9(UP~Tai-sXFxH+=1>|Xkq5$-_v1y{|v^;Y`qpo9@e@TM)} zh5n|sNcRN!qG>xf3T=PN@mP22&)7rH`Nz~`NvG8{atn(g?r+dA}L+yuf!eue|aAp2OYtP({ z{)Dt@uKbZzB{^`L6(=9T{uvlZsR-v;4XHj3eNZ{IYnug>)FXO?;`oMkBo-R6Wfv9Aaa~nL_tcdS6Ue8`{;XyLXeO9Iv zLK)|X3~{LaU98mK8Hl@;zG9wb4|Q2(dEh1TaJ1=oPWqf1PNy7kNdzv8H3$D*ePQ&< z6S{R@W{RHg2gz55{DCyC=GO5JWS`qAW_Fv!>5PH+O~jSQiJQ);;w?OYE;n8Q;Uf9V z!%XSyktZ2YUviy`wWK<*+yyP+TCIBEUpoh8COpP|a~f4(%ZP*+jrNYMJIse~UsFZ9 zi76UsF`&8O!=2q^`OA9pTq5=A^`5-1-w(9$XXFi9lHcnNb}{21r0GOX9!FsTv+%dz zq>F^U)h~>6NYE6Ik&f4O&;Epk9|p4Uew8@w6O?b-7UIgRpRIwpuY7gHiIT7!$sk8!Kz0T=|J&uf|yPrGXbPZjnwpQf(^MUGH;d6<*(PPAE!h=d^ zbKW+;ciTcWLugs@Okp^yde;N|{5HUdZg!xw3*8&?!}ZnPwCa5$bw3-2BWb1zefTKB zRXm?r2V3F@Z1blQ4)c1ZbgrPYWSiS8qk0sHi_2HB_ciM4W*)CD3nwpt83R0t10G{+ zu?;iy@I=BF_V?~e4DT1Aavj(0d+;6plxO=3~7S4W?B)9l9{E793?nhT_{*pLR(IbjZS59p&X zk`ZodqfN`eB+r!3iAw|0Mc^Kx$CMwY{72iYcR}}{>2mV1jbv;m9Z<$VoV!D5b%l{Y zo=py{Pq=pW1|y#?6uyv8Mzzkw{;vh`Hqc)9BxZthUmYc{)j;|_U4pZn+92g0>}*yb z`cA$l%(O?M*lPzU4NUqU+*U^j)z?iw*3oiXpHf&2!9N02mdBK)NNM9n+Pg8~(L@~h zCQQ)Zai#s8_jV;strLXB{KvtugnLI7*J`8}n32;g*xh@Rm{ucFoiXW-0Ll!1!ur0w zD|&(QqKfM(FMaH>4I`b4>!z&2);3|Nc$WMzp3S;}e)IduxP5grgIGHr=6C~G&UH0I z#0R^X^3Wdkw>8Vu`5NUb@U+MhezV_#avhAkn<;vmCMaEo%JY#vXOu^Bg{8`;)bc)w zha$QX_k9pEF85)F&g27eH+iirMp}$1{_`%40mnvOc&WZCQqS>`%X{{_zI*$(d^p{q+9{F?9Kw3U+g&7obtp8 zDsQ0cof5UaT*t6`Wf<~0L}8J_AMO^_MP+;N_QyN?ZE3<@y_v5g9^%xWoX%E9wW5q- z5_AbN#)!>t$@`>$%KlVNL$kuK_2`GO+8-R{Syy>h*yYw-l7=PBYbaH|pk|GHEb}g< z&tC~&Ca`{;|0uThIUp!I5`E1&!;?y(^1ny1tm-Ip%B9V?>P3~Ql+`<|vRb4* zf>*N!Fxn#!&T!@J$(y6%ELtC^T&S6?Wre3Ae~8_@s$s)cc2eOMVIMt{xlQo}o6SNg|k>(G{ zTOhq3NylS|wn{I`rt|YJ6L9O z1;&kwYk3* z@w|B-#c{g^T57t1jGxmM%C>~_cWn>`TpXe`T`|sQYR9!&_|NLB@ZmHWX0(O*uW2Pa zogXEgU$o-ZVa>(2eJ>$#)Ecda?ROaW%2c{mZiCyrlhwVzYlcqCso4zcHJk@$@7Uvt zW}I(+9w4`jIW01d{J@*K6mcWZ45E_%f_oJkSz3L8oEdqRPaT(kfk3v+3My z$3>W&87oU$*O!g&55lj=O0%SNKx_$qSFtg6zeMHv3F zIV285$BI4o%Axq!bIsduJs$sQ2nIzP;KtAXa`78YUUoYGk46o`ndkL&R6l&(ot|eN zWg`4X#^SLd<3!&9D}g<_3YnjFL$#$JU|RZPq;tdevs>bstO+bt=L`;)yUN8Li{!PK zInZ`zG}a(LbM2xp6jy34W4bhw==}t(N?f_w;~yAO`6q7nSPx0_oM2h6#kA&!BKUJ4 zr+UK9Rxc~gJ5MmF-rKcKJsy0ZmK^=Jxlpg`|9qwVcc?L1zV9Q0dYluz`ZtjcTla+J z9fM%8SA-1Hu2!5urI?<=G%y?k!}4&|6mxEwR*64oStM;g*WjVmC~9lvDjTE?kkfBi z^L2NNRDWnKIz_MtBwlZ}sw@k)oq)aT4{CIOSajGzYrpLw?C40(rZx%H_1ig^U-arC z@4blQmrEzY^WD?sIg8V5hIxA!_N)^oOm@f4!J+KoVZStKd2Rd8jcNl3P#li@vNRk z{N4P;^5o(&wwm{l#q7Rj;ky@GOmN|UI-aLGJi~U}45RxVWd^0kA>mRAWL{l~?>>Fh z^xh1Bg~#$l^*u@4z%3ruH-8VUt9oHyucsBW)49_CzTwh%IBR zL*LZwy+(CKsyXH#vzAku#=-D9pRhsA9ys>_X^aPBwW@}TXbtxu*RK<%UET$!Cko7L zI0Q~E9xn`6m-<`@GcAZQe2DvMZ-;&#>+^wK`*8i8*4R6C5DfWHLj4^j?wuHo>D}vd z>N#Aq+XH&ohCuBRbn+C^ zEF#rgTGX}VG<(?Ks2-o{_{QfZeNSn5j2Eh2PAU!t{~KSl?_2s)ruiM`U+;zChdQIv z{6Ic;tGDdd+yflzRmO~a^(F0Bcs4NNR%?d|!WFK)dM>EfWbbY^oMwf3Xq*hX(x0FD zHbhpUXHV9wjK{%knKV9Wzz3OG@+$LgMoI36>8f))#3NwxJW8^QSpR zSRA?qYv&J_Rr(I$3h$d0+Hzf$o~UL|to4izL?@l%7*XE4p*lydu!DL+OnbOP zXrB8qe8wM04fKYjwSG{{hV#hB_i?_hEvI$xlB;^Ys^{&=Be3zqLGR5?IP7W7o#x*F zuR})sDgw&82Qg`EmCdZ$oO{X=2r@L2gZRDX47*AzwmE%4mfmgD;tj)D?-lsuz{afFux^T zafog&`?oBV=A{SW`EOkN^cJ?Rx(|A_Opvap-r(u_AMtf9#)&&I{XZKtyT-#hRXdM~ZcX>PQZ>o3Cqt4%PVUm2~#9IQs|6H7BoaJ*AXzS!dz z^Y%H7G^?C23qAi0<_{Csv33XMVD-uWA!!Z#B9@{@rVYFOUuAi5UJdmBnXT|iHriK2 z-*Nc`kW&Mz@3EAl$7Zs=UgMDV#WJVn3DuX^V#;CWgxY9NpC|V2mdXrl-U9WncpI64 z4#r>5xrv_a=hmASt+AB~+uIsk*U{Oq^Zv7_j@O_#)D4F2wU>k23c`&&U~@kTrfd3; z-q?_MPGfGAsp|B@$~NWeDB*N9Cap&O=^EgVKrDkUys8YSLavzb;U|ut0MI|{9QOt98S?wc*K_k zHN){GleBT}U6J$_R?D@Mgcb1Xdrf|6C+CFoFk;{qMz}}!sW1Dm1y~r9%bvCP2_tIU z$1At{@xVW}_zpY47rm}R;nVtow(43mds}hO1T#)NhJ%03Kw38^4T-BdKNeJX-96Kt zxOZ_)nbM*SpKqHc?Ejg{%uh`jovTkL%N>~fJQPTO0pT*Y8s&v_77%so4APtdX)M`$ zkAdn#rh0hPi9hh)TYs3IRE#+b(xK;_HZAF1!r}5+^geopG}UOx(=$fW63{tVPo6vX zo4v@R&j$Uf6L*%fr(uJUW`p?hfv7Tng&YuKA!+`p2i9@oJ!z~>!_as3T=gx@K5m*a z40Kg4XzkxMg3msgNIb^v%(67nvRc5t?$~wKl!Bc8A9VUV*TN*{HJtbsNMp*``+KsOT~^p5 zw+c-5cG6AVrpJ^{pxK3{XLsuiZ+q}YUx|MmGZ9V^ujsqKiALqRI zj=|H;t20wrh;HH*tn5}9$#)UQ0oJVE7bj#i!ftOzBfW=_Zp5x|9GhG_fd=&4Jn3Rk z+BnX)Ca+P(wBoYrP}raVYm^B=eho>p!GKgx+KVM;E)Mu#^CVOnPiZjXXVx*h8>*Ul z?On(xOLzHdNCdF$Tg2~bPT2K#DBN9lUZ;HB(;iL1@%KaMn0<~ZTrT_53sPI9;*fMp zzG;;)SM#I16mg)Ydg`)U3-VJA!cEUv?yK#NrLA*yG}mBW#}ikUIV#=7{eL=E=tkma z#ovT~Hr)KyI4t>;1B7=na@aMe(&#fzv&pb6gG(|AIm0H@1k`!Lvbm&=uMt zc~0@`dkUV=|A70e)*>BspY^`vEQxoNCRRA0G#!xlR=g&+q|PG#dxFL%!|<_FDv+L4 znhM`f4U$#6_=~i-C(NkdMmPnVAbV^g68DPjy)uQuJ(q6tS(oGI1@Waed~+lOI%xPR z@E4n@-=u;k$?FLidIlHhmChNLGk(qE)ATFb(5KY+9hEG^wc^Y&9Cjf2TW z19W2+Y=vLFMkzkVv%Zl)epg+CeA5ToLpP@8mHZnV=x_|F|Ea%Lv+Tcl&?;F#<>hH` z`@dMY6!e*?Yi~$&5nCLV;2~RcNqd8mPfnaPA%;DT1oC~<-)&?si?JA8wH#e19)O>NZEr9OA-!4tO z(|z%`0?IGg>4^-~J}H;t56ks=J08Obi&@?j#t&yY_~ z>OpumRt#SkM7WoL#E&8;O+&&Iq}&K^UC#E&chN_}4l%cWQ~pnDjc?ms!M^jF5|5u0 zRlA%*>NRHBG6O|adkibQN1Apj{QDBD8)bS1JTg;pbioTaoD!;Z98f<&{!kBScRP#C z8+->&?}`AzN0n>g6^FVo6hNM)(*c0^q|DDmGyajnbK^X~YkmSf(>XGlC;K)Aw{ zXHZ(PWU4iirU#`{i`JxxGf$cc!WY)_&Jj?2Pc@U3m)k1;iv2$3iaT`%K>ZHpq(3eB zjMp33<&9gBd>Go_9l_M>(R>TiATaG=V<2r;AcrU5=`Qv>u_PAgJW=6`@(iTy8O@I% z-33io8w%3ZlpB0U(q)WtOY%8F5HG>4;6oK&kGK!Eq{?#!uZVypos)n#M7&OFrnppQ*Y&U9+3zzODee`GR&7M}ntbaq z%1~Fh@Lx z1$M0HLV9B}Q+TVij?<@ecy`2Q{Czf%QKndt5h~vmzx|bHeP#m0&nXlJ!`pDmAsJ~> zq`p>OiIKkp%G3Cw!-+sV!TuVVbHZMx_H&?vAN;ek=FVqo;nzh+h04Dv4~6|z*A#dR zx`;L*nK+c5|Nj%ShMCUI6@(9x{F|^WaA696NlWuF50c^bGk1j%aPs&btUTq06^hb5<3Rv3M^51U`)$;YM+VFld)J$F@>aR<_Q^kYV! z-}gt$^?E%2L7?25REt+xw+Xj?@|UaihvG@EnIf!zeQB^g3@5fX=2l60@b}nXn0_e> zH-A3>_tMPy)QBbgM$B`~xvMwy+%+8at{%dNd*<-B8)jjR;SJ^Ju2J~*?l9={(Gn-0 z>!7pQH$VnX{RBFkRhZ>2mnMTB!P5Jmwd=nGT=wkF{xkCx9ey<6 z9eX`PItz?n*bI(_S;3Ev4shE?A1+R-4-u|mIJ$W$bXrudTa`i29E@rPw+_!`xv7oB z^6@ z(0augOxqEOw~Q;vXN5LE|H+)Z2%s9^j|rs^SFIZ}x|Kw|Iz?7pbp)jMC(&kej6Byl z9PSrKL6gokv_PpZZ4;|wdHP2>&-<+Vh|1`d^iB9q$dXegx#Gh2oSyO5!KJ%q{9-E~ znVANd=k%P-3Yf;tCfDX?H#SGA2VOXP6(Zi&Mq3BY7VRx#hZ4g?POv@naB-sV)@}pu zI}zyh$QXK^G>|89{dxNrZ{a|V@n{&lUCtfW20Qkepv$%p+>5-{^>0$9gh*ZRyw2($Nr)>?i3W5O6*YW)DI zf8j}Ttf2FPg6dbH?rl%srm5j zK_0|z91GIA39os=3EKSXhTm@=#y&lcfqtd^K(m1F7ZkFO58H6M4!bn56*QaUB$t+_ zQZCjQOwPBIrZXad&JUX2kW?oVd3D$woH=bd_9W(^hW0vYP4` zg!5zkv1Eb;UR`qn+O#|bNgMjecdxB@R;BtNqxB`$$$(C8yI}f*Fxb)eGP~PiwS4qr z4a)mP_$+ca+`eE37JVN=kXLVr+TB)nao|#MtnwA{b(Sw%XcjJ>zep1Pu%3?1h1Zt#Soh^FM(@$)JB(B{ z;Da6F<&PyMxFGzN$o6)GF8z)}K&PwdSjU}Rmh<2>qkku3+c-Se*$_>VyFrWV?lL3W zU9R{XA$+PB;GMwRI`OhDc(%&}@0hP@u95IeUakLLTyN>h2el2*HtOE7dtDb}^kV@t z&Nh?XJC|wN`)Sbr#Z=rC(SyEE70s6XjD)97^lui7`Hs|2jCw(L_qZ>1S~Q2&+(Z^_ zJ0MS5>=b%w-KE>)$uf6jTY0XUEf{z=W##+Ug4I9`i)Nf99TUzP?AeP&_NH=S%1TZg zASRreNOf(`Y46fLycPWG5R37bK0-`SXC6H0Jti-3fHTt%v+q;-@LQt~R-9ko*Umr~ z%Zkp2z|X5)VCSPn_;hd%&eJ!AyLH~NwrS}i!s0%rUN|qPf1u|edOqozDbkv-2jvGt z`j|@ND;rU3IGFP(Re0r0E1nJ!4u2>4#T z9^bv}0fyNZK!;;Gr0Zh!waH-JeT2FXi|gD{686h8Tb=l(zx8<9(D|_Br!ntd-~&$% zUecNw6oa$nMF^gr0<}C%)ShLf&(9Txf;RdD{%3X+(`Kz#y}%XL4>vDjhIuui`bYX+ z!O$>iVfzK9HyDL^$&6nyvB8O-=HZA+{rRCp19>B8Bahl$2zLSp$@U4p(w1Zx^)#>b zbv@h~-I6eDDE?1JXB}775=CJJ0|6BjFc2F>2^-V&` z*#jmfc41+6ck|Zp$NTx7@7~KfGkdS~t$oiql)m{`g}dVV+0mi!yt}3zvi#~4S;a2T zU^?THTQ*o$gg&-%q%<2%d#85QCEZxq{j1Ea$}S`v1I2zXDW5&;LOW8DgpyQ z7yLM}P;F@wu-AARJ)YRd()iO>az5OSzU*@o&^TF3Wzdeua5^$6n%WdEVezPUPodwX z$q6*FEd3>@wJsdH#XHHTD`M?2l!jyHCpt*D@zySUf0uAH}9n-A*C3{jEQg^ zYLagv*_9k4;)~bOV-{Cqt3z+}&3V?t@AB&+HTZG4ZI)ecZVKOgUeLf4UbP9^;y8m+ zgS|NF{9)=-Q&W8LqoR0xT|NB;Q}*)>2%14|71nChi!{C0fAkCsbe6(ho6wDQQ3?!X z_K&f)0vf>|TP>iRKCP9b=VdB-EUyk*@K1&6>9XbRf%1p*W*%m>(e$>DaTDGYLj~c?dg@9 z9c>p_m*%ZW`(g*_{;N-kPR^BOic^ZJv9h&({opKjJ~WMdnq_07qfZxXpn}C)S}Kb} z1U{mN&2Q4E3-c9+&LXfuAbz%d{h%?ud{fTwftptL4cK6rhBrynvHG)om=4+)Ax2$r zfPc^6XFqHR948#R3X1ugkDR9thR>Z9u|>l8XpUZ3)FrCN!1Gk>y*uANn#^OXhpS>< zvnitJcGZ5LXI70D`rs#_+qnL(oWxqOVXfJqmCr?!g}UEtytr!lhYz zx>&AN$90%xypxh=ET<{HGjXj?1pF&TUEEv(+F4YhH)&=14*ojMM~w4xleSfBXYrol z$&9x385*y*b-0{WyA>vGrWVQI@eY;9=x1`HUCQu-_fR|o<=SrQo8MmJd{ya$;(A%` zMykZ_>vYFAn(R~Bt6g1v3@1qF52G#(=hEVWo(e3+b7wp1fWB2USc`&uaa~gQY62%w zx82(;I9Dp)Fq&35mX(xkI|p7Zq@g9eCf6Zrw(bwvdS$%PKW)k7BKuaksLD(_$j2T< z66#i6Z#fx$HI0m(rmlWTzjM}Szw46CvF+EG_xru%H%+VP-eX*4K;M1*%b}VoKj4)x z`+fXw9Jr^de0hMt(9_kZ#4v8^xd|ELvToLX1MWJ`BjHzOpXK5j1IYaU$hFKLiaT*; zR1(MG`HgQ6ueQ!9>B6ze{uHtEvIUQK)7V?X71Hbr&Vh`+fTPVi)1b;s^}BgcwMrcc zy}{3$oiJJ>lbl;?+kqA3`i;B!C>#TvPeUqRAP4VS^nQFTvfZ|Vr+g{Qqo$Z^u1<^2 zlil=qubpbqjv)D@=%8hAJ5;Tt%kS6y?5t1GH78|R(av+Y=vG@zuxE6sjit~ z)3%(3+4xEDZ)Nu{o~@vnl`osPk}hf|~gGg^J_#0cG@5Ix)Z z86I?BooKkqidy1B9(&RIOK4?*0@7&nznf(w{L^r;I@hbC99s5)s*op5VZCWuM`s0o z(_m7%-|+x@?~Eg#JKh?ZORM;U1RbQIVwPEV{pIQ%c6<{i4C8_~Io#^5THbz5J?pMD zYm$}ChSyjvJ}2ftU1#l)^7%VV>^tr(pcmr$+`bZ@VR{U;UtSvSS~Ar^AsfI;%JJ*@ zdGzwwwPO9zp=5e7%|@c%w+rMBo*c1X8P4&T+<^VF2TGUNiK6RRFInqLBg4}8`6*V^ zx;{FlCL!+#-z`T~RL-qrm~+jF9x7;~oovt{j@#8w;i=_av$z93qM*Uz(zW#p8AXA6 z6>_3B?`fNn%HN(8&|ppW`TK#u)XFBat`xXu2k$eP!uYDbbj@AEn?wliF$Q1As<#VU zH%Aqs;LHNDcKHx-ym`3jIcX-X9ygClgt|~tV@YX}R7TFjHCTsgeWoFHd5|~5EVov~Th0E#FB#0P;J32WjM8As zZ)EZ&_)AoHK8FjQS%f`a$~_w8X45}qU#zQv*ITi68hok2eQYv!$CeSCtJ?)-j+3JCDvFPKR%e40MBk~y?A>r z?zbo-Q9E3%(rs|gH!A%uLd~97S0O)V9g9;hH)?p&)%d+ao={;6rZ94^zP@RJhqeRgvBeJJ~fHr7PEH42kO#-I=Byc>sErE6Xp?&ylYsWD?HClHow5>{RiStJwEua6C*wGOSSGh<$S>RH zO#;&?_%<(mwu+%)3hPH>PDc_lus~1qtcIsV+j+AH$79VNRE&aCck-qk?HOCSbX}hwt6D+C+CqHMyTYq-DS`Gg`p7_)P zUShDiVNE&sx+5KWQrBvj1YB=1eQ0?4=+0esPU z^2Sm2+i=x7vS+l(d1~6E8}unWf{+2^h_Yv#9}@ahK_k?-uQ7qBBZ?~?QM>h&E1m9z_|NH!aE%)4Cw>9L6qtxG)1%wJywspAPK-<`v2S|OgPrF6 zs$BEsjAMs>%>my;ar&uPlaJWU0|)x=rsxID7(bh#rG_u4`KZqdb2&!cArr?NO~CWT zwwSpIUp<{7HU?~DzGrlQ)c!D_7ol2tE z?IGlQUVPgkp$qVUgUY>73pPIOacCd@TqZo~b+H;g{C+Bv-Df5!%;sqCDqHRllSn4p zv@Kjk+MNs`^oW4xvdM#B7k#_f5afr2yvy?#YX7N%#|khA*Ynv7{)?d@y#lz~<3-#m zq&2-fV4jV3j5gn#ZhB4Oco?3g@}4av%`B>Sjyjk@CNO5TY+2{a_m@X9a!J*~|1(J{mf&F@IppcInlq-o_6(AU&sCv?EzO>V{_ud`-*B!N^~h zZ!umHf5%1xO;$+_-bp5tgX58VgDvQB#;i!)oe?I3!ivkk+jg+YAx4urIma`+NFBko zN#GKCGs|6{>N=Iw*s~VQvj}?4n6c8F=ex3Ur^y+3uS(bl&AR2!`&vmeXBC$B?Gyps{sE}pJ6)~EG!Nt){x7d+6u?jo&ualPRaH4?1R31)w05@ zo0Or?dEFpCcup0CyaC2YpzT$n#IV<2c}2!by0Wq^?rH2TJ{I1h9(BuSUMJ1Dw@7v| zyeLfWGcz^l9)X=D{+^6Rhkwo~)3dEs185Cj>D)oOJblWS!WRq2=I(OJ+=X({Pha`C z%{@B(p$-48Ql7&%=GW7AtQH~lk1D^n0kXTFqmHlQCPPxDQm%ltBxpg67CrJ+OXLGrxu|y`KMhH zM>Kw@^sudg6)*1P(&96|TOLD)eR9h(zgqCD{&7;Z;q5{1=WPgueYPa2dKKZmY$6k0#UB)I-UCR-g z*5U)taSqWmZh$V-b|Ia&R-uGaxzwfC_2^)WP24)XxqLIKyTG|vF7B@&;<{C)GSi%? z?!hVQ)>R+g@i&iNxnV3F`TB?)Pp9*iyzA7o7Q?8u{SsRG5zpfO?nckE|IDJ9SXBFX=u@F!$K+I(iXMOv#P%Ae3KT>{%YCiiq($)!q4ri z^3&wJ{3zAcy69P1wq4OoJ;pPX8@*hh7ds}B-`WL?HIgo!U3Kg53;fixF71r$Xx2$) z%$>?(yW6l|-|sZV_B>B~T1RZK-z+}Od!$S)G|s<~#>bYUA_uITy=0gI`W>qD)gB!}-`qfrAicS{8Ju(mGm z{g~^Qh|AiCkoITAnGNMdX1;tn(qp&6=kT(48$EMDE+etIE*D7sb$Oe>mmhCE7CQ7UwLGo93Nf#}7Bnq-HDbQ-S-P=yuiHRQj5x zsP~Jh+t5n7>e-Eqzq4HQc_9{BTB!xNXAFBM7ni^_1Qp|HS;aa!_pSbN-GaUZTxxx3=^TB8 z3-w*aHZOi!rn+4xo7}nd@|PYOy~?~VMBcf-M^tw^K?7}G^4Vj7*2KXT^|+DqMWvMD z8b3>%?G#P}MsL>7?e`PbfyT9+3Juu7%jH@18TZJQE!k6=W1YCvRo8vCTvhacLeIlL zkhM=~S-TRvtJ)-9)$p8}c6}d*+@H^1{r1w&AM<&5;eq%L#YIQG&rjw&vu(I8O#e(C z6K>5{&0elaUnpvy&ZFP18=*e7YM{Mt`pa4g+4QT$!}-$iT>M|cLq2`4u}+UJpc}Oe z#vJhsp*LmStM-(%y*@uFV3F8MdB*J-wJ5MAsZzlv6za83^_&n7eaK|9)}6eUbKW#dky1U!TM!n9Ndf^&$gEMx{7so~x!}q3z^@Nl- zS|6bJ=MM)SF>D#(eATg24P}e*X?)|I=DEW@i{#)uG8E56&FpEB*e}k0>^WUty+&u0 zZmev53(KRM{rSzW&6wN%Rx0O64(ifDERN2u+plZMGU^Zdr8XCg#Pfu&W$@NLulUs2 zI{MNr#Y^sQ;PhpA)IE8JzWVv$wUfv*~;Y!l3T0!n111587=t2fp1@itHR+o3Y*&oTt$nz2sa$4c(;A7pL*zY1P#FCMS4hjbMG$lAX=5ef+!5 z_#c~k+%usLH{3SFvUK1RrRGXjD@^s6PNok z^q57(r0=P6+Gn$itWlwv^4YwC-hC<}gC4I_(NizUyd|?s?}@mlZe2Fi@OpK3%0Kd( zZ7ZK956T%8=aLO6NPfd96qZ@06nDoev@ z-Gb)=`lYW2zv80@Q&h~(JM=EYNq+BphT!cYxzaSzwULMQk#7@;n$c!G22a}}-r~Ej z&gXjZ;=}f|Ap0RY)oX!rec74kB`j5z{kip)Q5W^qNxAiZ2|f7xge&;WB@BJ#>vyWl z47Eb^2hlls+sK+ba#5}(rDaRc&8p?b3|#-*h6Xj5Lw`QlQl*qAb-q+zg&v^YFaOce zX7ebbaSz$$<5w!yE|)HbYmBVpVpNP@B;EOVh$2__6SqsW(l0OziR%{61Mc2X(_UCe z+$sVOFUggk#`Egw&%~P4aQ0j4sCUl0%(tdy)2LIc(JQctx_xdN!xxFW6a`a^SAQ;l zH-1iGF~`;XJ7x6ce`!=?Qx&6=6s0F<)MeJXdp}AP-_&Tb-GTct)}P>#5o@huPu0{n zk8Cw{t5K`;Ca*U%?=|iEI-Sq`Zp&DIiS?1&55MNA748fFo-f!vv#Z=1l3)qI?>{J( ztax8?7qRqXG@l&kg70e@X!txsE1?_nX~c;jUex`u*zNAZs42oasTtKXMEh#)4Bgex zPw4P4*|xt{aSl23-T3AF`r2aZI5kC?{wjaMmj-!Fd$2rw{uVQO9zGcoCdLhe2xpPSUI@%%6Wb{Z+0dC-q&qmSS1Gw%x zV+oyVIa9>8jiWU?e$cVomuXV*E;8@C*X%XZUQVshRXKFm zv(9jy=oD0pO|95WKVb>ab&@{5TAJneZab4WN7@T3UdKKA?m)e75rZ1!mW3Z)!xLJ) z^tM5#=tsHjmeg%=Ds@{W4*FI`yEMWz*XZ0hFy*~rl$et>32*YbF5aQ4PE1}fSWq% zY13zM`)cbIG}YRsZVW%Y?511WJE0EpWoe5!=HqMkf@w$d{a%A9_{enH-s+GFos@|b?to^?C75T5;ru31M`DOnMYuiho z#Ap9QD(@&?ws*~@4MSsXXj%LWzLC8FFW8euSD*ZoJLb%;Zy$Ci!zo}%0Z*jp!t3ee zlt@k+pO4RM*kMU)FdpmjRN&mj?ir1E^4WM~jcA&XQi)rg{75U+OomS}Y7^%*!04D7 z6`CE4@|GLDbd*EShv-5V&Y@3=sCOM&(A94z!KHC3{~%8d-Qg0IN2p#4lT@_#Hf21+ zZ}v=Obhc*1FSe_^)Pgl*=$nGa(N2F)VVLbu^~F5;Vl8>6{Tpz9)4*S|uF~YQm92(T zk?qL2)e7W<<7_nM)zMrU*@DokDkXFoUC4EVs{BZ@Zu?|MQ{6%>otB*@WGhR%JD=(L zX>Z-#eI~>6WU9l{EZsX37q7bdui%xXljyJ8DjE^=SB2GEMEjo%6%U6D(M1ke1o9;p zDLRb7p0w~t0h4do^ic1ntGJnO1$pm=r$&G11$*X#v8#eh9W1F1xQ}V}R|U`Krk##c zl@a4~nQ2SKiP)MVp!x;8zoY4+B>X(#*=L%(F(*PLydTChtR)GxWLTMxb;t9CF%y7S zXk;FN8l}j})wo^WJd_^2jQX}KWX_enhGi!(FkAM|$;wh8DVZuccrKw2&F7N+;{n3x zZudXm7+$6(-wTwe8wPXA*uIC!^Lzr?3=w>==PqILgy9Y764+9h+EFDOvxUKZ>f2j;h4Z4fB@fcd8pjytqwBm|Pxx%v5!ccf9bfy#O&3cRysusfna^bk z8y>AP;)g=^)~G*jKj##|BU#Ukg@>%bnxreEABJi1k6BC6=gl*PbKs(>bq}MDWXOF7 zE3zi#9$uYxbxtJ1=GPKR^Nh3#x@hWg0v~YabC3B%dk2#vv$82(D-B~J=ZQTNj$*Ap znA}2tZ1XA{n-zM+ZaV|zq~iB1$*U5w{I*4b!n$ehLVV}GuNC!0w!Ui!`Wono=PaRD zEMV9`)I1Ma9%H;vR!v+*r3Xi3_0Z<#IxBw#bx9|O#mV&WOB@x943I_Bn{q&doBkWV z7t+3`wi9LnrjGea&6ipI7PGQ7{3nM*e;ZC$UCPX7Xfk6qBEdvPFBuu%(r4B_hW83E zhDM$hmgT=O-x$fYgBvoqNHmGVJ=#5AknysQy<1}aHySn&&{9jacF)Dy%n}-FZGq1* zGP^SUQhV25^O~$RgJB4#A7Xi(LW;d13nuPMtr#AS#1IzIHV45iOF^;z#Saz)nUNneLUTI|dkAICg zi(KeH$mGDeyOH*sz0-a${CbP=HmsS#oQnG` zsmc?7G-fcUsUf4W@>lUFem}Jb)y&6q_E?hG{%V>zPu~1|4q>*SiWL38WBuL=%%6ng z=?-|NwUhp1i)ZPco(rvZ*T|eKTHg|zykd~AM)AZo8ZHH40C8btc(PoF*K5q>C~!D zZK*>@Spr8%cn%MK-j0TzPZd}{^hOdRlTnYh#bvd2eFZdB-!5@k;JxZ^g%Z-})9`&Y z2#lkl3tZw*wJg0v{-PXV2L!ZV8qWc%G4$JLD7nNR<$djTGPIK3b+}02L-}yXI|7ed z9D9^yqleHelf&4|zM$Qr%<^>vtv&qQLs_l|50Evji;bocIE{;J+Glv3O;195cy-Y# zgn1gb$jpbC+$(r@b~bzObI^l(mCK=WwAqcko35UH_fnV}aQBr*35-R^M`m86Hf?#C zHSd|dIyaqsdd2KtmQREIB-UG*bI*UZBjye-;T>)YJ|v-;GKCw!L-MK4z4wye+rz@l z_O9i*Xl;07rYO`lFIYdX)USF_SAQ4zWc+)&@cF#Je2DP#6mqr6@e-OYC)XRQz+w_y zrtAGIX66r;CowtXfhEUPTAEdW$!Se`UTN|#d^D>!-cNm~kV!BTv*3EMU*vN9ESXuE z$r0vwFatBX3T``Wavjzpl987fET=F>5}Chq$bB)_tcD*9hk><>9uag~WumVfb>*mc9UyGAGeysuRd1 zRq*@Cazt5?uNXYTSU1M(H~@1L+BvTkGRYV8QK-Z`M6~*zOXI)Q!87;yXSbsku!;E7 zE>M`$E*+kNCUf8onf0NA-SgV4Mp> zos9i5^FT3ZFN+3KIvc*WnmmH6L-~(fX8TDAV5SMy@dfV+%%i1q(0uesFT+1Z z-^HcY69|09!{%>Qm~{~Nnvsjy%&|;Pt?>6WLl32U=6%CwY-$GCMU2mO6?4FmVo*mX z8e`Xq&zE{GwuaP@W*%6~b2vHRxi#QPiDOC17gA6o2N7~6&#HxI+h9&L^Rpr_4wZSH VYRQq-kIc-+Qae!Uy`T2}`2O9U?yEmZnObteq@p~} zvPwy_n#4ITO|D^rXa-rXS51BJg?iF0KU*I*zc-)$+`Me+w`nZ7!PQayfKP*Y1c`6? z$uzcH+xM)9CyoV)H@D}p7lbfv>W_y`(GB{mGEkzJ@6*l;rJbK$ z88_l~y`O<$F*B^a#(M=@0tTTBT56vExx2H&Q;BO@8VxL?j2n3dG*`5&qLOP_7@4bM z@~FizY|>gHM$6?AG{odwfFg|W3UiTxCbA^G36i(?(8%arMEk4Hqt$K+c9q|h>B zdPPoQMUq?-BT(P4D1{N;3Ek0+WWPm4k|!tzTaTm8T`OwJ-l&lol!j$Rf4Nq8BSUh| zD~KWWI|X`q>^_rB*09ifxal)6n^>5 z(-VSemTF1bKY(;w2rJ3B69-zA zpv&K*q>v$!QC>7subHVy5FQP-w_xm2LMK=1-)n?Vc%Bp@)rr zDidgF&;ygE-fMj|;+?8Kq_kMY4B5pg{9rW0YB{Oz4bVhF8ke*LgGawMN8$rD4$vnY|W}$B}d+u9q0C5o79z?Fwh*9?x9Etbw6+ol+ z#lg!)96tZVJ z80-48fV1h$3cYx4xn9Vv$l;FV`#!flcN#cRa)q&E&Da-L7>fm~@!k4|ob9yVu=f9M z7uR)yS6=wn`kY@&ea@4CAmlh3gW Date: Mon, 7 Apr 2025 09:55:31 +0000 Subject: [PATCH 3/5] Remove debug print statement --- llm_utils/graph.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llm_utils/graph.py b/llm_utils/graph.py index c65e75f..9b6e503 100644 --- a/llm_utils/graph.py +++ b/llm_utils/graph.py @@ -103,7 +103,6 @@ def query_redefined_again_node(state: QueryMakerState): } ) state["refined_input_again"] = res - print(state["refined_input_again"]) return state From 335fd9d81c8df6dfd85ae3c6dea02feb2119a89b Mon Sep 17 00:00:00 2001 From: ehddnr301 Date: Mon, 14 Apr 2025 07:15:27 +0900 Subject: [PATCH 4/5] Refactor query handling in llm_utils - Add Reranker - Added new dependencies: langchain-huggingface==0.1.2 and transformers==4.51.2 to requirements.txt. - Removed the QueryRefinedAgainChain and its associated logic from chains.py and graph.py to streamline the query refinement process. --- llm_utils/chains.py | 68 +++++------------------------------------ llm_utils/graph.py | 73 ++++++++++++++++++++++++++------------------- requirements.txt | 2 ++ 3 files changed, 52 insertions(+), 91 deletions(-) diff --git a/llm_utils/chains.py b/llm_utils/chains.py index 271dc29..9806721 100644 --- a/llm_utils/chains.py +++ b/llm_utils/chains.py @@ -40,23 +40,28 @@ def create_query_refiner_chain(llm): 예시: 사용자가 "유저 이탈 원인이 궁금해요"라고 했다면, 재질문 형식이 아니라 - "최근 1개월 간의 접속·결제 로그를 기준으로, + "접속·결제 로그를 기준으로, 주로 어떤 사용자가 어떤 과정을 거쳐 이탈하는지를 분석해야 한다"처럼 분석 방향이 명확해진 질문 한 문장(또는 한 문단)으로 정리해 주세요. 최종 출력 형식 예시: ------------------------------ 구체화된 질문: - "최근 1개월 동안 고액 결제 경험이 있는 유저가 + "고액 결제 경험이 있는 유저가 행동 로그에서 이탈 전 어떤 패턴을 보였는지 분석" 가정한 조건: - - 최근 1개월치 행동 로그와 결제 로그 중심 + - 행동 로그와 결제 로그 중심 - 고액 결제자(월 결제액 10만 원 이상) 그룹 대상으로 한정 ------------------------------ """, ), MessagesPlaceholder(variable_name="user_input"), + ( + "system", + "다음은 사용자의 실제 사용 가능한 테이블 및 컬럼 정보입니다:", + ), + MessagesPlaceholder(variable_name="searched_tables"), ( "system", """ @@ -72,61 +77,6 @@ def create_query_refiner_chain(llm): return tool_choice_prompt | llm -# QueryRefinedAgainChain -def create_query_redefined_again_chain(llm): - query_redefined_again_prompt = ChatPromptTemplate.from_messages( - [ - ( - "system", - """ - 당신은 데이터 분석 전문가(데이터 분석가 페르소나)입니다. - 사용자의 질문과 이미 구체화된 질문을 바탕으로, 실제 사용 가능한 테이블과 컬럼 정보를 검토하여 - 더욱 정교하게 질문을 재정의해 주세요. - - 주의사항: - - 이전에 구체화된 질문을 기반으로 하되, 실제 DB 환경에서 사용 가능한 테이블/컬럼을 고려해 현실적인 분석 방향을 제시하세요. - - 불필요한 재질문 없이, 주어진 데이터로 최대한 분석 가능한 형태로 질문을 구체화하세요. - - 테이블 구조에 맞게 분석 질문을 조정하고, 필요한 가정을 추가하세요. - - 최종 출력 형식은 반드시 아래와 같아야 합니다. - - 최종 형태 예시: - - <최종 구체화된 질문> - ``` - 최근 30일간 결제 금액이 10만원 이상인 사용자들의 서비스 이용 패턴과 이탈율을 분석하여, - 어떤 활동 패턴을 보이는 고액 결제자가 이탈하는지 파악 - ``` - - <분석 접근 방향> - ``` - 1. subscription_activities와 contract_activities 테이블을 조인하여 고액 결제자 식별 - 2. 해당 사용자들의 activity_type 분포 확인 - 3. 이탈 사용자(30일 이상 미접속)와 활성 사용자의 행동 패턴 비교 분석 - 4. 주요 이탈 지점 식별 - ``` - """, - ), - ( - "system", - "아래는 사용자의 원래 질문 및 1차 구체화된 질문입니다:", - ), - MessagesPlaceholder(variable_name="user_input"), - MessagesPlaceholder(variable_name="refined_input"), - ( - "system", - "다음은 사용자의 DB 환경정보와 실제 사용 가능한 테이블 및 컬럼 정보입니다:", - ), - MessagesPlaceholder(variable_name="user_database_env"), - MessagesPlaceholder(variable_name="searched_tables"), - ( - "system", - "위 정보를 바탕으로 DB 구조에 맞게 더욱 구체화된 최종 질문과 분석 접근 방향을 최종 형태 예시와 같은 형식으로 작성해주세요.", - ), - ] - ) - return query_redefined_again_prompt | llm - - # QueryMakerChain def create_query_maker_chain(llm): query_maker_prompt = ChatPromptTemplate.from_messages( @@ -165,7 +115,6 @@ def create_query_maker_chain(llm): ), MessagesPlaceholder(variable_name="user_input"), MessagesPlaceholder(variable_name="refined_input"), - MessagesPlaceholder(variable_name="refined_input_again"), ( "system", "다음은 사용자의 db 환경정보와 사용 가능한 테이블 및 컬럼 정보입니다:", @@ -182,5 +131,4 @@ def create_query_maker_chain(llm): query_refiner_chain = create_query_refiner_chain(llm) -query_redefined_again_chain = create_query_redefined_again_chain(llm) query_maker_chain = create_query_maker_chain(llm) diff --git a/llm_utils/graph.py b/llm_utils/graph.py index 9b6e503..37e1d1a 100644 --- a/llm_utils/graph.py +++ b/llm_utils/graph.py @@ -10,7 +10,6 @@ from llm_utils.chains import ( query_refiner_chain, - query_redefined_again_chain, query_maker_chain, ) @@ -18,7 +17,6 @@ # 노드 식별자 정의 QUERY_REFINER = "query_refiner" -QUERY_REFINED_AGAIN = "query_redefined_again" GET_TABLE_INFO = "get_table_info" TOOL = "tool" TABLE_FILTER = "table_filter" @@ -32,7 +30,6 @@ class QueryMakerState(TypedDict): searched_tables: dict[str, dict[str, str]] best_practice_query: str refined_input: str - refined_input_again: str generated_query: str @@ -43,6 +40,7 @@ def query_refiner_node(state: QueryMakerState): "user_input": [state["messages"][0].content], "user_database_env": [state["user_database_env"]], "best_practice_query": [state["best_practice_query"]], + "searched_tables": [json.dumps(state["searched_tables"])], } ) state["messages"].append(res) @@ -66,9 +64,42 @@ def get_table_info_node(state: QueryMakerState): db = FAISS.from_documents(documents, embeddings) db.save_local(os.getcwd() + "/table_info_db") print("table_info_db not found") - doc_res = db.similarity_search(state["messages"][-1].content) - documents_dict = {} + retriever = db.as_retriever(search_kwargs={"k": 10}) + + from langchain.retrievers import ContextualCompressionRetriever + from langchain.retrievers.document_compressors import CrossEncoderReranker + from langchain_community.cross_encoders import HuggingFaceCrossEncoder + from transformers import AutoModelForSequenceClassification, AutoTokenizer + + # Reranking 적용 여부 설정 + use_rerank = True # 필요에 따라 True 또는 False로 설정 + + if use_rerank: + local_model_path = os.path.join(os.getcwd(), "ko_reranker_local") + + # 로컬에 저장된 모델이 있으면 불러오고, 없으면 다운로드 후 저장 + if os.path.exists(local_model_path) and os.path.isdir(local_model_path): + print("🔄 ko-reranker 모델 로컬에서 로드 중...") + else: + print("⬇️ ko-reranker 모델 다운로드 및 저장 중...") + model = AutoModelForSequenceClassification.from_pretrained( + "Dongjin-kr/ko-reranker" + ) + tokenizer = AutoTokenizer.from_pretrained("Dongjin-kr/ko-reranker") + model.save_pretrained(local_model_path) + tokenizer.save_pretrained(local_model_path) + model = HuggingFaceCrossEncoder(model_name=local_model_path) + compressor = CrossEncoderReranker(model=model, top_n=3) + retriever = db.as_retriever(search_kwargs={"k": 10}) + compression_retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=retriever + ) + + doc_res = compression_retriever.invoke(state["messages"][0].content) + else: # Reranking 미적용 + doc_res = db.similarity_search(state["messages"][0].content, k=10) + documents_dict = {} for doc in doc_res: lines = doc.page_content.split("\n") @@ -93,19 +124,6 @@ def get_table_info_node(state: QueryMakerState): return state -def query_redefined_again_node(state: QueryMakerState): - res = query_redefined_again_chain.invoke( - input={ - "user_input": [state["messages"][0].content], - "refined_input": [state["refined_input"]], - "user_database_env": [state["user_database_env"]], - "searched_tables": [json.dumps(state["searched_tables"])], - } - ) - state["refined_input_again"] = res - return state - - # 노드 함수: QUERY_MAKER 노드 def query_maker_node(state: QueryMakerState): res = query_maker_chain.invoke( @@ -137,9 +155,7 @@ def query_maker_node_with_db_guide(state: QueryMakerState): res = chain.invoke( input={ "input": "\n\n---\n\n".join( - [state["messages"][0].content] - # + [state["refined_input"].content] - + [state["refined_input_again"].content] + [state["messages"][0].content] + [state["refined_input"].content] ), "table_info": [json.dumps(state["searched_tables"])], "top_k": 10, @@ -152,21 +168,16 @@ def query_maker_node_with_db_guide(state: QueryMakerState): # StateGraph 생성 및 구성 builder = StateGraph(QueryMakerState) -builder.set_entry_point(QUERY_REFINER) +builder.set_entry_point(GET_TABLE_INFO) # 노드 추가 -builder.add_node(QUERY_REFINER, query_refiner_node) builder.add_node(GET_TABLE_INFO, get_table_info_node) -# builder.add_node(QUERY_MAKER, query_maker_node) # query_maker_node_with_db_guide -builder.add_node( - QUERY_MAKER, query_maker_node_with_db_guide -) # query_maker_node_with_db_guide -builder.add_node(QUERY_REFINED_AGAIN, query_redefined_again_node) +builder.add_node(QUERY_REFINER, query_refiner_node) +builder.add_node(QUERY_MAKER, query_maker_node_with_db_guide) # 기본 엣지 설정 -builder.add_edge(QUERY_REFINER, GET_TABLE_INFO) -builder.add_edge(GET_TABLE_INFO, QUERY_REFINED_AGAIN) -builder.add_edge(QUERY_REFINED_AGAIN, QUERY_MAKER) +builder.add_edge(GET_TABLE_INFO, QUERY_REFINER) +builder.add_edge(QUERY_REFINER, QUERY_MAKER) # QUERY_MAKER 노드 후 종료 builder.add_edge(QUERY_MAKER, END) diff --git a/requirements.txt b/requirements.txt index 2c506a8..2998e38 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,5 @@ pre_commit==4.1.0 setuptools wheel twine +langchain-huggingface==0.1.2 +transformers==4.51.2 \ No newline at end of file From 5244fda33ce642a524f6d0a5e26b18ebba5ea530 Mon Sep 17 00:00:00 2001 From: ehddnr301 Date: Fri, 18 Apr 2025 08:49:24 +0000 Subject: [PATCH 5/5] Add reranking functionality and new dependencies - add reranking feature in the Streamlit app to enhance search result accuracy. - Added new dependencies: transformers==4.51.2 and langchain-huggingface==0.1.2 to setup.py. - Created a new retrieval module to handle vector database interactions and reranking logic. --- interface/streamlit_app.py | 7 +++ llm_utils/graph.py | 76 +++--------------------------- llm_utils/retrieval.py | 94 ++++++++++++++++++++++++++++++++++++++ setup.py | 2 + 4 files changed, 109 insertions(+), 70 deletions(-) create mode 100644 llm_utils/retrieval.py diff --git a/interface/streamlit_app.py b/interface/streamlit_app.py index 395b2b7..7846eed 100644 --- a/interface/streamlit_app.py +++ b/interface/streamlit_app.py @@ -28,6 +28,12 @@ def summarize_total_tokens(data): return total_tokens +use_reranker = st.checkbox( + "리랭킹(Reranking) 기능 사용", + value=False, + help="검색 결과의 정확도를 높이기 위한 리랭킹 기능을 사용합니다.", +) + # 버튼 클릭 시 실행 if st.button("쿼리 실행"): # 그래프 컴파일 및 쿼리 실행 @@ -38,6 +44,7 @@ def summarize_total_tokens(data): "messages": [HumanMessage(content=user_query)], "user_database_env": user_database_env, "best_practice_query": "", + "use_rerank": use_reranker, } ) total_tokens = summarize_total_tokens(res["messages"]) diff --git a/llm_utils/graph.py b/llm_utils/graph.py index 37e1d1a..112b25f 100644 --- a/llm_utils/graph.py +++ b/llm_utils/graph.py @@ -14,6 +14,7 @@ ) from llm_utils.tools import get_info_from_db +from llm_utils.retrieval import search_tables # 노드 식별자 정의 QUERY_REFINER = "query_refiner" @@ -31,6 +32,7 @@ class QueryMakerState(TypedDict): best_practice_query: str refined_input: str generated_query: str + use_rerank: bool # 노드 함수: QUERY_REFINER 노드 @@ -49,76 +51,10 @@ def query_refiner_node(state: QueryMakerState): def get_table_info_node(state: QueryMakerState): - from langchain_community.vectorstores import FAISS - from langchain_openai import OpenAIEmbeddings - - embeddings = OpenAIEmbeddings(model="text-embedding-3-small") - try: - db = FAISS.load_local( - os.getcwd() + "/table_info_db", - embeddings, - allow_dangerous_deserialization=True, - ) - except: - documents = get_info_from_db() - db = FAISS.from_documents(documents, embeddings) - db.save_local(os.getcwd() + "/table_info_db") - print("table_info_db not found") - - retriever = db.as_retriever(search_kwargs={"k": 10}) - - from langchain.retrievers import ContextualCompressionRetriever - from langchain.retrievers.document_compressors import CrossEncoderReranker - from langchain_community.cross_encoders import HuggingFaceCrossEncoder - from transformers import AutoModelForSequenceClassification, AutoTokenizer - - # Reranking 적용 여부 설정 - use_rerank = True # 필요에 따라 True 또는 False로 설정 - - if use_rerank: - local_model_path = os.path.join(os.getcwd(), "ko_reranker_local") - - # 로컬에 저장된 모델이 있으면 불러오고, 없으면 다운로드 후 저장 - if os.path.exists(local_model_path) and os.path.isdir(local_model_path): - print("🔄 ko-reranker 모델 로컬에서 로드 중...") - else: - print("⬇️ ko-reranker 모델 다운로드 및 저장 중...") - model = AutoModelForSequenceClassification.from_pretrained( - "Dongjin-kr/ko-reranker" - ) - tokenizer = AutoTokenizer.from_pretrained("Dongjin-kr/ko-reranker") - model.save_pretrained(local_model_path) - tokenizer.save_pretrained(local_model_path) - model = HuggingFaceCrossEncoder(model_name=local_model_path) - compressor = CrossEncoderReranker(model=model, top_n=3) - retriever = db.as_retriever(search_kwargs={"k": 10}) - compression_retriever = ContextualCompressionRetriever( - base_compressor=compressor, base_retriever=retriever - ) - - doc_res = compression_retriever.invoke(state["messages"][0].content) - else: # Reranking 미적용 - doc_res = db.similarity_search(state["messages"][0].content, k=10) - documents_dict = {} - for doc in doc_res: - lines = doc.page_content.split("\n") - - # 테이블명 및 설명 추출 - table_name, table_desc = lines[0].split(": ", 1) - - # 컬럼 정보 추출 - columns = {} - if len(lines) > 2 and lines[1].strip() == "Columns:": - for line in lines[2:]: - if ": " in line: - col_name, col_desc = line.split(": ", 1) - columns[col_name.strip()] = col_desc.strip() - - # 딕셔너리 저장 - documents_dict[table_name] = { - "table_description": table_desc.strip(), - **columns, # 컬럼 정보 추가 - } + # state의 use_rerank 값을 이용하여 검색 수행 + documents_dict = search_tables( + state["messages"][0].content, use_rerank=state["use_rerank"] + ) state["searched_tables"] = documents_dict return state diff --git a/llm_utils/retrieval.py b/llm_utils/retrieval.py new file mode 100644 index 0000000..dbd1d2b --- /dev/null +++ b/llm_utils/retrieval.py @@ -0,0 +1,94 @@ +import os +from langchain_community.vectorstores import FAISS +from langchain_openai import OpenAIEmbeddings +from langchain.retrievers import ContextualCompressionRetriever +from langchain.retrievers.document_compressors import CrossEncoderReranker +from langchain_community.cross_encoders import HuggingFaceCrossEncoder +from transformers import AutoModelForSequenceClassification, AutoTokenizer + +from .tools import get_info_from_db + + +def get_vector_db(): + """벡터 데이터베이스를 로드하거나 생성합니다.""" + embeddings = OpenAIEmbeddings(model="text-embedding-3-small") + try: + db = FAISS.load_local( + os.getcwd() + "/table_info_db", + embeddings, + allow_dangerous_deserialization=True, + ) + except: + documents = get_info_from_db() + db = FAISS.from_documents(documents, embeddings) + db.save_local(os.getcwd() + "/table_info_db") + print("table_info_db not found") + return db + + +def load_reranker_model(): + """한국어 reranker 모델을 로드하거나 다운로드합니다.""" + local_model_path = os.path.join(os.getcwd(), "ko_reranker_local") + + # 로컬에 저장된 모델이 있으면 불러오고, 없으면 다운로드 후 저장 + if os.path.exists(local_model_path) and os.path.isdir(local_model_path): + print("🔄 ko-reranker 모델 로컬에서 로드 중...") + else: + print("⬇️ ko-reranker 모델 다운로드 및 저장 중...") + model = AutoModelForSequenceClassification.from_pretrained( + "Dongjin-kr/ko-reranker" + ) + tokenizer = AutoTokenizer.from_pretrained("Dongjin-kr/ko-reranker") + model.save_pretrained(local_model_path) + tokenizer.save_pretrained(local_model_path) + + return HuggingFaceCrossEncoder(model_name=local_model_path) + + +def get_retriever(use_rerank=False): + """검색기를 생성합니다. use_rerank가 True이면 reranking을 적용합니다.""" + db = get_vector_db() + retriever = db.as_retriever(search_kwargs={"k": 10}) + + if use_rerank: + model = load_reranker_model() + compressor = CrossEncoderReranker(model=model, top_n=3) + return ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=retriever + ) + else: + return retriever + + +def search_tables(query, use_rerank=False): + """쿼리에 맞는 테이블 정보를 검색합니다.""" + if use_rerank: + retriever = get_retriever(use_rerank=True) + doc_res = retriever.invoke(query) + else: + db = get_vector_db() + doc_res = db.similarity_search(query, k=10) + + # 결과를 사전 형태로 변환 + documents_dict = {} + for doc in doc_res: + lines = doc.page_content.split("\n") + + # 테이블명 및 설명 추출 + table_name, table_desc = lines[0].split(": ", 1) + + # 컬럼 정보 추출 + columns = {} + if len(lines) > 2 and lines[1].strip() == "Columns:": + for line in lines[2:]: + if ": " in line: + col_name, col_desc = line.split(": ", 1) + columns[col_name.strip()] = col_desc.strip() + + # 딕셔너리 저장 + documents_dict[table_name] = { + "table_description": table_desc.strip(), + **columns, # 컬럼 정보 추가 + } + + return documents_dict diff --git a/setup.py b/setup.py index d5e4805..d02bc0f 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,8 @@ "streamlit==1.41.1", "python-dotenv==1.0.1", "faiss-cpu==1.10.0", + "transformers==4.51.2", + "langchain-huggingface==0.1.2", ], entry_points={ "console_scripts": [