From e32378e660a4b4c7b99eff928f3e55c4467261f9 Mon Sep 17 00:00:00 2001
From: PedroDnT <pedrotodescan@gmail.com>
Date: Wed, 24 Jul 2024 18:10:22 -0300
Subject: [PATCH] chore: Refactor analyze_model_performance function for better
 performance

---
 .env_example                      |   3 +
 __pycache__/call.cpython-312.pyc  | Bin 10553 -> 17471 bytes
 __pycache__/utils.cpython-312.pyc | Bin 10355 -> 10355 bytes
 call.py                           | 169 +++++++++++++++++++++++++++---
 utils.py                          |   5 +-
 5 files changed, 161 insertions(+), 16 deletions(-)
 create mode 100644 .env_example
diff --git a/.env_example b/.env_example
new file mode 100644
index 0000000..ab6b016
--- /dev/null
+++ b/.env_example
@@ -0,0 +1,3 @@
+db_connection_string =
+OPENAI_API_KEY=
+OPENROUTER_API_KEY=
\ No newline at end of file
diff --git a/__pycache__/call.cpython-312.pyc b/__pycache__/call.cpython-312.pyc
index 6efbdbf46a94171efc78e46883b8cb0e4aeddb5f..bba3c1d857434874412acecd3f09dca5274e2b67 100644
GIT binary patch
delta 7737
zcmb6;Yfu~4db`p}NJv6LfGqLy0^%X_umPK3jGcga_(>d-;0Iu3(Joj-NMv`#LtR<i
zncQ34_$Ih&Yfm0Cq|?qNO&`j5GS&TYr`(%NLYj8EvdD#K_~A}(e{?$SD7G_6d}rEz
zXQh>dbDi|=pl{Ef^PTT}=R4o)eEj_UKP=+~pBW7K7(CxN-<ec*-z>1=Z@gcB2rr;@
zjEbqW!3b?)D#P8lO{=DhUm-5n(k0<zxCMI3v=;cTTf`lE?!1W4sFeOUHFrey2Y85t
z=THmT<E1={0fwCL@mEQ=*X5gGNRFp?W`^<d97%a;a)R<uUKey<Wf<skd&xzgpCxBm
zhIYGnx6j*>kBD`&kOMxKpCf(V%$RqI<gYSh{dtD-dw7cWk!L>qo0t7=+7}{eiX)$$
zWmxLNpMCfN!;!~*Uf%8XQ_x>eHViR5c{0V$B<q`zN8LbCnuONn93;>{3(Qc!6+^LJ
z80W|thG*Tb6ju^!mk_@!!_C4RK=yMy8Gc){(Az}LyJ6cDIVm&#J2?GbM(>npoADk=
zOcD*mAMF44B@^z=UoCD*vBA(1IhW7n=9ut*=Qo)_+$pC0z&*1-x%e=tYi%I=e5BXM
zliW-0S&)JoB+rrvh4L<vaGVTF@(eq}-8rW-<44gEHIwHV-p_h7vXD-*-#g`Tb5~RH
zY9@z(Dg#XLK9Xi8-ChRR3jdw1K17~Ef<t-%vX{ipQqV-JqBEGIJbuulMbb;nFfy?V
zR~fY0B*U^uG(b~6r6!W{%u|b;#PJWvOVW;~a?~Sz>@P8k>BDX!C;U9=qC6hb&6CrJ
zQF)>8i~2WA6<{PwSdt)3nxzvDd+|G3gI$Y1y3=Agq$s=sgJ<E#eW*-f-Qlh6_{o~q
zt?G_gbw|9q^J_z1eZd0^z?XWgWbexH+a*aIR#===V|9&R=Mfh3mly!}v{fgSSasvq
zO1z{gi2>NQRfLna1#8n+$`o3mIjICfqQ2%UMDWg1DUKVOzLkc*{TdD3d9Ca<T#>H|
zS5`FQy?0JkI26iGK_9+X*)<Y02nJd=hqHD39$;B|XuV)~6Qd1+0${=0s`v0zKaSDH
zput<1gK0|RD0kz8U#r}U7l!|~(h>gOy&BvW){yPB=Ae-;OD}=(NP^NsoB6$&c2W36
zvWl_G-`O`^EcvAp1cN3)IsIfDi-c471e`t6s9C_nWL0rUPzZU`j_fjWi?-6X_Y|ow
zLGyzc+8@Nw2?oI^n1n*XOqafupB7gMhGQ#=8~p^vH_A&-H-%rTs%j?XZ@yY?O)Bgd
ztAbMiNnXNk{M9_RpuB|5<BqcMPSteCp-iYe6g$NvG&4RL93jsnRNM^caG>@=%9nMK
zFL;@SS;obKLC#Q9UN`TjWmBsU=<2~XS#l9noeWr8$YF*D7b=-qy-c#cg`EP1a!7)K
zBT^+PT9ZCM`1lN&@agKsqDOh>C@<q7d*nv%qwrQWt=z-*hmX~i;XUEYHLay=H_%O}
zCOtliPpDW#C!w677TCV<YR%~)CmPkwQY^>NPMYE=_H_7vYRd7saG5<=G!3KdO9*%X
za76&Z2duX6NA~iXKY@?isMiB0x`b_;i<eGsmp5*ex5mm_<K^uSu>E0W?Z1!dO1G``
zTh`{7wRw$-TMukmJ7U(3xV3Zn<hIc&lE-4kV>_l2(RwuMj*f2V#OE)H!<WQMFN+p-
z)5L9;*WJ|qMqelvACB6i7V+r|qWR*c{?fL!dii9SsQb6PllXp>?%_oEzv>QzKDHe@
z*`oNgrSOzO^^Z6X(3RFV;1t7D@B{CGJ*WiZyM;|*L4|<zVdIxU{gr&$AR$oWg7Qu5
zZAI4ncr;6T1mzSi_Xi0cQB3s{0{&iWda<BNP_fl~US=?3?LoDm6x6f=or<8C)`lmQ
zl_NLK=B%xJeC-=!1fa5ec%4jcS}(V<Z$Uv5f31?OYZ&8o;g!nr@KU8FG>kda6a~A>
zN8x3V@k(0Ekq|Z@WpGoTOeO%4S5h*HgjmCqzDe?UpY!<0S<(fnpUN{pL#80=BW>n#
zf_D!)#dS&aQ!|_rfrc|cCN%2y^5c#!5`BD!CIp*V*5_ilG&QeNB26~HB$zYgpP7IV
z37pLMU^dIRAhM-aM{z2h-bhR{l<R6PzcP0yiAqJ(G>5ZvOCM<Z01F|oi|ipYAFxd?
z4@nBL&TQh876}Bc|A#OFMdwnxK{QhMp)N9^PBjCyVAi>mr_zU!vh5U77x}ycPw3rK
zULVUi-CmkmaA+hmX6xa+SriCaWPL31_Uw-VB-FsUi{jazz=tD`MTW{YqX8r333;43
zO9CqGbj{7MRR{+ZA4@3cNe-zh(hhjoI@DPKr|isVNDc+*WZAa>=bz!nAr}BK+k)+}
zEj@eBTyY=ESM}k`>K%<iG}WvQtagUPf~HMP^Ny`?ZDj34+;(WWV8_z3b}c#|w+t@p
z{#9q$DQsLj9WU%$?%gh{TJBw5xPE3^U$&*Mj_Ipc+dkLVrR~2iY8B1BpX-k&OVA=;
zS~1O@9b@6^i`N%#?~U-A#%@v5{Tt{It_!bT6EziV18^5g6y)sRNl&^mh7O|y{3Mr<
zfW{vZx4`&c!TyM#6|Z1He3_WUY2_;#<R+$bX8|sHb{n05(Q3)AF**;$!}N?x5~>XN
zj6agS2u#zUX|xtR5ilGC4*<&{#vXg-Fb=<A7BE4XA`c<0JcK^c^c(xYO<-W_`BaN+
z>pe7$td@ji)K7Vwa<b*5-C#&kg6Q&j{4;<6(S;(1dd~KP4M;s;Tqs}6kzCeJWXl3&
z=?P^FKL-_rOg7}28CY`G%2+AAg;)t`==W-w<WD5<Wcv+~FU&&R0&3X>Qul=??ip=1
zXRDmF!Y-vi3W03s!9f7Gu&j3$F!|J`(-9#YRp@0eiw`*ztQn@TK>%RHIL9HaNt=`j
z3>p##cYtwnaQE>}1st>B)SMJer-N@xx5o$RF?X!xan!GX3|9hR3EMGRZx_BQY#FO!
z#;UlnM%2`Ro}a?7U0XnbN!o%2ILO_?bVdOIBmK}g4I2KQyd_Fa7SPJ?b`iAdaV!XB
zySobMyxguJCWN0Usz5PFGZe@h+c&mPa5`BE77h@o9k7gzHjG{w8y|0UjE#?>renNu
z%n?wGwLZdM3}{Ddj$IpTJ}Qhml<YiAODH-21iJ_=2hPsHD_6t?NU*cc8H#<0_RV_(
z^=SczzANF~nBO2mZU9Rdav*cp=T@dyt5@l`v2mF|J_hwn-DX!0#*GcjL>fo5?2nX0
ziMa7-rfY@1-Mb1swb>reYHw%_x|)}XZDY~vf$M=+1yNHbIj3D`h622FW=mlFe~RA%
zd!}(@>OmY@3aO>g=Bgu}lZ=4dV6`;BIh<1oN?LLfT&0w{gn$Tm7|2l`%WE=TW?C(w
z>;l8!T2f?am9#jK&4Xw)qDAMuopVXa8P)JwnUp}hho{!Z9$P%EZ^-p!lN*Gdn)*cS
ze1V`bT37lQy(x^=|2d9wuRiQ`wM#LDxfJ72lL@L7A?$jApq2^WRNT-LSTHZBrm>(#
zAi(cJ%nNE22un*F&|YQSV7`zq<Vl{lNYDl~IeX0D?q(Fnyg0aG&C<~<ZiqI1NA%g<
zn-B}HYttM<C=P06Hh@(v0s9DQcQ3;Kz`<pgrVC-^61d(78X%DoE)E=q?^P5ROLxtp
zY_c2Fr3T;@M;8Y`d~nLS!}=W3CDLSzpxaG32V+kT6^o$XjVI`Sq!9E=RXO-pNvmu?
zH?)h{AKEWig)yUWy3FC}a(eZdUcI0b^e^>+pN1_CV|3XtNEk;_R?o{VZD<~=!FZEQ
z6fTK&9LwRfG{fod!Kh1d2|~1d#RU2&T1XGje=2Cmp&u&GE<jiC$oET+V379#!px{N
zu=c$gfffd3H(WEoex>(3F0<jN9Af_rXtO>9EC2%bA*lUjvOg%U@u>VJ-$CU(l|`W>
z(A>vN_@}z$LSUvY2^C7#4?&nkwJBCko*&^|0rm5XvygcQ3bWb08)}&xbeV>!nOP45
zSxCssL#p3JvJ<d~Lm6nyDNnkk>SVU&=ko0?O$KVu_+6}T|2dYK_PYZ<RG8#^9yg7O
zoA7szVckC37qC5smK|bCk;@9~d4WTTyqq|t>sY{N3ncQK-_27K!0cngze7@iqMkX*
z?UArkg-^8Ey=lqy%QaEfH=mJJ2(Df15yXVLxwWmiy_Ic4AC>7j$NelTUxl&^Gz658
zYGnw+{^#62sP;<5hYXtyf$FnvZuid8M=YOH7a0m@n#fBq2k7nZXIVHxxMa*RtW><n
z(1JWssx4&r9ytqV#voR0safgkUG_o*1hp-sG}uKtbg5enN?{WU?`%Q|m(m2mFNUD%
zyns5DkR!6!pvC?Ofs+V8wu3p02_*=OeFJrzMxY8n!j!6yLyDEIk0(qrv(B{W6Y5kW
zVa}vkbY@JS#tv~#M7<W#EXu*7Ij3|@Oq94%M<88TGr4ub0(r0`^xggJIO6af0332u
z=%ST<mQr`FVw4T1b9Fi-=jegvCuWJc_%b>H(UbcKEb|*E$`lr_T)bJatll=7S30i$
z!7`B~I<=)c75gKWh$dcfczNiHqAIbrbG;x|J0y<KBC~LJWKk-|gu0_6(G#Mjf73Lu
zv&VL8?&jQz;ojb6v85-rw|BiYwzpr@mrFOZp7pwQ>qenyKE0_wv!gFu8F=fUNFIon
zwTh)}qPcxje-L`b;@ZD(iTk?Zbw|Y7Zqa;nQ-4hAwu`lgA`_99#oEK7y-PG7+0=L6
zCqe`X<=6709y^6!#(!OPIXV}sZi{q$Ufm^DT^99~0PmIUT|TpIZMcuA45cf?7j+#G
zK6)zl)L^{s1l&Hii|WLE-FJ(+w`=NG&g>N1#Paq?U$iuKuup977fS~=iwD0bwk35i
zJ6V94%0*+1wD*Dax%JRh(emu3>73NjD4x3f>!#6l``Yu7eevd_cbi7V#xc<{zG=FW
z5soB__Q*h_GujsQM6ZbEGn@Le_v{C@><44^gHa+f7afT><8}~S)pl*`R_&o!?V)IK
zl=`_Ps##Zy9fR@O6QaIaA{1(i9E{XOm16gE;*s;Bd3aO*yd*BPg~IpID(&LaFNnuR
zMDvAB{l$AmyJ&0@4-AM0h9sW5q8-tO^@fdu8+D@PxlPmg45LXxWvJX~fTKJfZ|D<E
zwL6X$aesf@F(8`ilKGge1;~W%S_f8IMg7o5FMx0F_v2XgDE{qzE7m-QfAi3e6<6NJ
z6hH^n?gls1>kAtT;>9uX!nkO;vT1UrSNmf_QUzGQeVC7#Y%l?|52-@)+T_~B$i*lf
z9a%s8Ps8GK=RX_SI3b!}*wl~g6qVfSxEbpB>Dk}HTy5ziZV@czrw0d*V!u@MRKn|5
zq;XJ5{A#}y+8=5TAlRuN)Z~9epq`H^dR6fH*lrm#U>|p>q5TPtBdE+r(6Dc?OZmxD
zQqN%=wY!w4|ETuFQRRlZ=|rb;qYV$Ck4_ap$??K%j-VFYT*CIm%d`|k69l?i!d+z+
z+55`~umDgQnVOyTEG9~k(ZYZ5X5k+|PV`?G=ft8jRg+2Nr7qu`L&d(0;t%_m2z&q_
zp#prylX4*FZik%5Gby)We9S+vzku=ogdZ0M0B+!ZF|M-TSLdtBA6mOrrO9hJ*4~3}
zwGG7D2E@Tpal$3G4aD2%r6IAXB}VLr|BzHRO{|@WRUF!?I1;Nkve`Wnueh)@w9*kH
zN@3L84kxKPhcBzPb*7b}t>S&L;(d2@`?l?EkwY>2;Vt{qG5gc&1#$bc%R{#h-PKpY
zK>~hKgXv2}!X{mP9QEPW`gct~1|AbCuYYECkzIp!_&@5;wxEk1`wRle)2AXt6KYAt
zFI449RlwdttzU#MH<ado22F5%822!It>IFr2JaBbwg(uzlc(^LIBo{ht%-1vhIg+d
zG4#FeO1(E|ad<?$I4Ta0e(HT74JAhu?Rag3hzv(-A{XB;NMi80-u6lNFS{S0W^x)Y
z$1B(P$nkd<lNdDD+ojh=+rM;w3Ml2cy)xN7f#apCy+1qi&Y2{J+L7~rKlc9EL#ZuE
G;Qs(18b-wc

delta 1044
zcmZWoO>7fK6rS1jI*IMIz4kgG8z(Vd8`~uO*x^qir36<hAu53eS^@;Luy$IDtfTct
zB+;6hTZEu88me*tiK;!2A|#_#)zV8rh(d%^*-E7ZUpOF6TsBHj#EltSu8{hq`R04y
zd-LAR?9N|v@Zp!v7c6T7*!qJ%rT+<DcW!_?9z=FR%M6V1#@}#)p{19Mv8(Afh$<~b
z<7e9zh&aa`M?g~KKstMVDwmScQ%A2AlR#EMG#fX@SPr5quA44DE?fYnrNiA^IJmaQ
zNVr2SaMt*`eJ90@;n+bua!=@5QnPWpqYn;t#Fm7u6=Cb5uzj6vida_xh7|{3+vbu*
zpJ@jiZ(2c@xZY%GwXFaQ*yRM127<UwLtn@Q7?wF7YVVxh5PC<~(>R++<3eSJ_>L4b
z<~t!|L+@+h_v<8N{ND9FwEbxT;72eH@(`_TYefZ*&0VC%1OQG`0OYAWt<&gwpU=1v
zXr^E{x*hDdDp0oPSu`B%h7L3=#$g+}EcS)jyi04x=c}34(O@#Gb^gbl=%u(32GQk!
zmvfP1*-bF-*6Ga0DpymvhHCfJe2)$@VI+mxV1!g~d1d|{-X$@;IrD$5rLOJ%IM1%L
zx=VNKobHi*w`})deLVz4>T0J2XtBBvKxQ-U<V8Z*xuvV2X@LeyQz4)-5Xkt{qu}bV
z7r`_=3NFG}D>@#|FthaGZ3Adz(~Nhv>4>DHE(8V|<iIR?wMp)7RX@YEs`l}el2Rq@
z+pLnSTC~)(R;ARGx*wg3#C=sJeIYApRYuKDDRPybkfzlnx*r+xo~f;!lvMmPo|Pp{
zQV$_})DN>L8qM3&II4~l_y(1tpNQ8m{Q-|$55BDeSoZLRp=E#1l0RPY#~1y5t00Qp
zu{ZubpBIOh_*jLH-QBpz_b%~?3ZM9w-|_tTTnjq7`L%U#l(DZJLwkF+^H1aZ2CdYB
zl^Yb8g?5NB7DjIe=3SR?qbj3JO-#;IDP<D<a^Q>5B-zWS1O^F22=o#lZ$kZ!z%|sJ
z6l}LJC0`_Gpk#9NTo>F`4kT6qp3VJm2tv=@V<oYaD@Eten1J|?&e!MXa(U#d@`+RB
oky8uGYAs}*q<UevWGRjOA(l=&aGC(Sj}p)NANQ{jsz!064{}NlI{*Lx

diff --git a/__pycache__/utils.cpython-312.pyc b/__pycache__/utils.cpython-312.pyc
index 8f3025496ca79aa2201c7e4bb6a15a9d6bd98e73..1eb46dd348a6cf6ed0f586ed72eb9531d3d6f861 100644
GIT binary patch
delta 38
rcmewy@Hv3@G%qg~0}$+wT$m=ak=Ig{@xf+yRTX9qSw`U^bD#tO>8c6i

delta 38
rcmewy@Hv3@G%qg~0}yciUXUiTk=Ig{@y2F%RTX9q8AjnEbD#tO<?ac-

diff --git a/call.py b/call.py
index 360632b..6f034fe 100644
--- a/call.py
+++ b/call.py
@@ -46,24 +46,28 @@ def create_prompt_template() -> ChatPromptTemplate:
     2. Start directly with the analysis sections as outlined below.
     3. Provide all sections in the exact order and format specified.
     4. Use at least 5 years of historical data prior to the target year for your analysis.
-
+    5. Analyze both income statements and balance sheets in your prediction.
+    6. Focus on predicting the 'Resultado Líquido das Operações Continuadas' (Net Income from Continuing Operations) as the main earnings metric.
+    
     Your response must follow this exact structure:
 
-    Panel A ||| [Trend Analysis: Analyze relevant trends over at least the past five years.]
+    Panel A ||| [Trend Analysis: Analyze relevant trends over at least the past five years, with a focus on 'Resultado Líquido das Operações Continuadas'.]
     Panel B ||| [Ratio Analysis: Calculate and analyze key financial ratios over at least the past five years, interpreting their implications for future earnings.]
-    Panel C ||| [Rationale: Summarize your analyses and explain your prediction reasoning concisely, considering the long-term trends.]
+    Panel C ||| [Rationale: Summarize your analyses and explain your prediction reasoning concisely, considering the long-term trends and focusing on 'Resultado Líquido das Operações Continuadas'.]
     Direction ||| [increase/decrease]
     Magnitude ||| [large/moderate/small]
     Confidence ||| [0.00 to 1.00]
 
     Additional guidelines:
-    - Be precise and focused in your explanations.
-    - For Magnitude, use only one of these words: large, moderate, or small.
+    - Be precise, focused and cocise in your explanations.
+    - For Magnitude, you must use exactly one of these words: large, moderate, or small. Do not skip this or use any other terms.
     - For Confidence, provide a single number between 0.00 and 1.00.
     - Do not include formulas or calculations in your response.
     - Use '|||' as a delimiter between section headers and content.
     - Ensure your analysis covers at least 5 years of historical data.
-    
+    - Return responses in English.
+    - No need to define fomulas or calculations in your response. Just mention the ratio or the value by name.
+    - When referring to earnings, always use 'Resultado Líquido das Operaçes Continuadas' as the key metric, but call it just earnings.
 
     Financial data: {financial_data}
     Target year: {target_year}
@@ -71,8 +75,6 @@ def create_prompt_template() -> ChatPromptTemplate:
     return ChatPromptTemplate.from_template(template)
 
 def get_financial_prediction(financial_data: Dict[str, Any], n_years: int) -> Dict[int, Any]:
-
-
     """Calls the prompt template and returns the entire response in a dictionary for a given CD_CVM."""
     try:
         print("Starting get_financial_prediction...")
@@ -105,11 +107,11 @@ def get_financial_prediction(financial_data: Dict[str, Any], n_years: int) -> Di
             filtered_financial_data = {
                 key: [
                     [{k: v for k, v in item.items() if k == 'DS_CONTA' or (k.startswith('20') and data_from <= int(k.split('-')[0]) <= data_up_to)}
-             for item in statement]
-            for statement in value
-        ]
-        for key, value in financial_data.items()
-    }
+                     for item in statement]
+                    for statement in value
+                ]
+                for key, value in financial_data.items()
+            }
             prompt = prompt_template.format(financial_data=filtered_financial_data, target_year=year)
             prompts.append(prompt)
         
@@ -164,7 +166,14 @@ def parse_financial_prediction(prediction_dict: Dict[int, Any]) -> pd.DataFrame:
         direction = 1 if direction_match and 'increase' in direction_match.group(1).lower() else -1
         
         magnitude_match = re.search(r'Magnitude \|\|\| (\w+)', text, re.IGNORECASE)
-        magnitude = magnitude_match.group(1).lower() if magnitude_match else 'N/A'
+        if magnitude_match:
+            magnitude = magnitude_match.group(1).lower()
+            if magnitude not in ['large', 'moderate', 'small']:
+                print(f"Warning: Unexpected magnitude value '{magnitude}' for year {year}. Setting to 'moderate'.")
+                magnitude = 'moderate'
+        else:
+            print(f"Warning: No magnitude found for year {year}. Setting to 'moderate'.")
+            magnitude = 'moderate'
         
         confidence_match = re.search(r'Confidence \|\|\| (\d+\.\d+)', text, re.IGNORECASE)
         try:
@@ -191,4 +200,134 @@ def parse_financial_prediction(prediction_dict: Dict[int, Any]) -> pd.DataFrame:
             'Model Name': model_name
         })
     
-    return pd.DataFrame(parsed_data)
\ No newline at end of file
+    return pd.DataFrame(parsed_data)
+
+def get_financial_prediction_list(CD_CVM_list: List[int], n_years: int) -> pd.DataFrame:
+    """
+    Generates financial predictions for a list of CD_CVM codes and target years.
+    
+    Args:
+    CD_CVM_list (List[int]): List of CD_CVM codes to process.
+    n_years (int): Number of most recent years to predict for each CD_CVM code.
+    
+    Returns:
+    pd.DataFrame: A DataFrame containing predictions for all CD_CVM codes and target years.
+    """
+    all_predictions = []
+    
+    for cd_cvm in CD_CVM_list:
+        print(f"Processing CD_CVM: {cd_cvm}")
+        financial_data = get_financial_data([cd_cvm])
+        predictions = get_financial_prediction(financial_data, n_years)
+        
+        if predictions:
+            df = parse_financial_prediction(predictions)
+            df['CD_CVM'] = cd_cvm
+            all_predictions.append(df)
+        else:
+            print(f"No predictions generated for CD_CVM: {cd_cvm}")
+    
+    if all_predictions:
+        return pd.concat(all_predictions, ignore_index=True)
+    else:
+        return pd.DataFrame()
+
+def post_added_data(predictions_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Adds an actual_earnings_direction column and a NAME column to the predictions DataFrame.
+    
+    Args:
+    predictions_df (pd.DataFrame): DataFrame returned by get_financial_prediction_list
+    
+    Returns:
+    pd.DataFrame: Updated DataFrame with actual_earnings_direction and NAME columns
+    """
+    def normalize_string(s):
+        return unidecode(s).lower()
+
+    def strip_markdown(text):
+        # Remove bold and italic markers
+        text = re.sub(r'\*\*|__', '', text)
+        text = re.sub(r'\*|_', '', text)
+        # Remove links
+        text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
+        # Remove backticks
+        text = re.sub(r'`', '', text)
+        # Remove any remaining special characters
+        text = re.sub(r'[#>~\-=|]', '', text)
+        return text.strip()
+
+    def get_actual_direction(row):
+        cd_cvm = row['CD_CVM']
+        year = row['Year']
+        
+        try:
+            financial_data = get_financial_data([cd_cvm])
+            if not financial_data or 'income_statements' not in financial_data or not financial_data['income_statements']:
+                print(f"No financial data found for CD_CVM: {cd_cvm}")
+                return np.nan
+            
+            income_statement = financial_data['income_statements'][0]
+            
+            print(f"Debug: Income statement structure for CD_CVM {cd_cvm}:")
+            print(f"Type: {type(income_statement)}")
+            print(f"Number of items: {len(income_statement)}")
+            print(f"Sample content: {income_statement[:2]}")
+            
+            earnings_metrics = [
+                'Resultado Liquido das Operacoes Continuadas',
+                'Lucro/Prejuizo Consolidado do Periodo',
+                'Lucro/Prejuizo do Periodo'
+            ]
+            
+            normalized_metrics = [normalize_string(metric) for metric in earnings_metrics]
+            
+            earnings_row = None
+            for item in income_statement:
+                normalized_ds_conta = normalize_string(item['DS_CONTA'])
+                if normalized_ds_conta in normalized_metrics:
+                    earnings_row = item
+                    print(f"Using earnings metric: {item['DS_CONTA']}")
+                    break
+            
+            if earnings_row is None:
+                print(f"No suitable earnings metric found for CD_CVM: {cd_cvm}")
+                print(f"Available metrics: {[item['DS_CONTA'] for item in income_statement]}")
+                return np.nan
+            
+            print(f"Debug: Earnings row for CD_CVM {cd_cvm}: {earnings_row}")
+            
+            current_year_earnings = earnings_row.get(f'{year}-12-31')
+            previous_year_earnings = earnings_row.get(f'{year-1}-12-31')
+            
+            print(f"Debug: Current year earnings ({year}): {current_year_earnings}")
+            print(f"Debug: Previous year earnings ({year-1}): {previous_year_earnings}")
+            
+            if current_year_earnings is None or previous_year_earnings is None:
+                print(f"Missing earnings data for CD_CVM: {cd_cvm}, Year: {year}")
+                return np.nan
+            
+            try:
+                current_year_earnings = float(current_year_earnings)
+                previous_year_earnings = float(previous_year_earnings)
+            except ValueError:
+                print(f"Error converting earnings to float for CD_CVM: {cd_cvm}, Year: {year}")
+                return np.nan
+            
+            return 1 if current_year_earnings > previous_year_earnings else -1
+        except Exception as e:
+            print(f"Error processing CD_CVM: {cd_cvm}, Year: {year}. Error: {str(e)}")
+            return np.nan
+    
+    # Apply the function to each row
+    predictions_df['actual_earnings_direction'] = predictions_df.apply(get_actual_direction, axis=1)
+    
+    # Add the NAME column
+    predictions_df['NAME'] = predictions_df['CD_CVM'].apply(get_company_name_by_cd_cvm)
+    
+    # Strip markdown from Panel A, B, and C
+    for panel in ['Panel A', 'Panel B', 'Panel C']:
+        if panel in predictions_df.columns:
+            predictions_df[panel] = predictions_df[panel].apply(strip_markdown)
+    
+    return predictions_df
\ No newline at end of file
diff --git a/utils.py b/utils.py
index d780852..7faf2fa 100644
--- a/utils.py
+++ b/utils.py
@@ -149,6 +149,7 @@ def get_company_name_by_cd_cvm(cd_cvm):
             return None
   
 def analyze_model_performance(df):
+
     grouped = df.groupby(['Model', 'Company'])
 
     # Initialize an empty DataFrame to store the results
@@ -176,4 +177,6 @@ def analyze_model_performance(df):
         }])
         results = pd.concat([results, current_results], ignore_index=True)
 
-    return results
\ No newline at end of file
+    return results
+
+#get 
\ No newline at end of file