From 29eb45420a178d2da01a34e696842bca4c6432eb Mon Sep 17 00:00:00 2001 From: eric Date: Mon, 27 May 2024 16:31:02 -0400 Subject: [PATCH 1/2] add docs for elasticsearch vector db integration --- .../images/integrations/elasticsearch.jpg | Bin 0 -> 20298 bytes docs/source/integrations/elasticsearch.rst | 712 ++++++++++++++++++ docs/source/integrations/index.rst | 8 + docs/source/user_guide/brain.rst | 1 + 4 files changed, 721 insertions(+) create mode 100644 docs/source/_static/images/integrations/elasticsearch.jpg create mode 100644 docs/source/integrations/elasticsearch.rst diff --git a/docs/source/_static/images/integrations/elasticsearch.jpg b/docs/source/_static/images/integrations/elasticsearch.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bc7c9130430198ec1b94fa30a9c02b75c5ea9f95 GIT binary patch literal 20298 zcmcG#WmH^Eur@k_6WrY)xVyW%1cC&2cMA@|-QC??g9RtJLxQ_I!MT&X_MN-VpS#v~ zn^{A5Jyq4cySw((uCD#I^tJ&&mXVN_004mi01)&Ccsm6+E1BCl+c}xr*%PxeFafwk zq~#$X0Kba{K2Qq)jz=Mh4+a2$KKNkH0MTz{#KtDBCf0WLCXS|#7RCl1#Pr1G&d&Bc zjErt>ZVZkl#)dA2hSnwwMs_xg_9k}r)+UVqQv&ez10V{31c!it0EYy9K|(@8LBqp9 zg9;KH94tHv5-KVR5()|$CO#G#IxYqZ3N{HgE&%}%5fK^|DLE-2IX)o~;cq2CkSH`1 zG$ITPA|W~oI^qB4^7ajY0t5CD>?1gk6aa<-1V;hB^#kyKPY&eoN&2@yfPsSs34{Tm zcpv}_{C}d5KrnD9=(iOBJU9qM21f?X@5#Ty|G!m$)@?hAvUj#?BrIh3!;wy;#JZjO zZ6{jN5gJHB|4XmneLq|oFzw_BJ;BHLCF+QZg(Dn)Pz^~PSEghS@8O8Y#G}|Q*UNbv z{J&&X&AY-jnUQ9+#%E{ZP-3at!EyS$l6fJGCMJh_^W>ShYylVwLmd`%JB<~KOuwLL zX%BMge@e~b(Y*4dw%Gp|oZE@@{Y|u9L?{MR4Y=%Wif3i`l<{zYF-JAuR1ivrdWJyle9z^20U<0j4i4^Kxc} z!+`A+d*#QDa&@T|^Gi2iwa_0CEc%A1W`(?_C!UC8J9;CbB-{t3f0Ti`TkY^b)~0h( z=PfCNU-kpE4k-`MYA_;&Encgb6i=({+|x&@?N=v zE2xw!RRaErsihENis{=Ztl z$Xksc1JS-eQu40PLM`+Fzg$xe?!NLNZ2ZFt&Kr;;7(Bs0PY26ki{bYV z8Hi{sN?cr^x6iMgcz!5qWe))B>Ko&U&i1G|!L6e3V%|Remlvz@shLwF`vq@uDE~CF z@E;Wbj~$WIG{<+eP-RV7Z3|@89QE4_b-`n&z=qAcEC9I;?^#vj^_8wghHlaB9TKnp zzk=|#Ngn6Z&o4|SgUjUqP)nAKNX~QpGZp);Kk>K`HsowftawvsOk_YKJZWc~C_h?% z=WIR4L#ZXm8^ga$-!ntlsuHRGcX7aWzlL~o7~Ig{G4F{`kjn7?t+!D#jZmK{Z6KS~ zlD}UlE*AiF;`FmT(8j`(JiO7#7G69q6q#?aQ61InC$#PVXD~Rn&nG8I{6)e@pR(|5 zJ&r%yJBskz?_m9flZ-UVLuMm&IPr4z@`nDVX{>kotZ%s-wVs4W+s4Nc?f>#G?)eP> z02tP+T+3tGGkM`5LlnXZ5u6p{F41^S1g{Ym9m&a&u9AaymS31|ezO8Gb?8F#JB_Rz z?!|AAhkui>Dr+A}&Dams5*)Pdjsvb1KDs!2bN~j0euI#scUD{rd)b3UrABQrmQTr1 z$Gv_uCtjAiyxuThIAufc_Kea}ucv zqO5x->i+Loq_V%YfxQ+7SMhRkWV z!S1iN<(s{dddHTym|?Qf`8?ZcDl^N-WS#)Gz~~K@#RlEz?#Yii^4hdfeSadQ94d@Y{lu+_lP#`E}iZ=@Lqih*Lw%{r|WptQ6S%MwZKmE zjy2v};i5qS9clgF6z%*QQBp+td~dj(Q^P?tZUCU;3xSsDrZ%yGf!$~E{4hdm&Oe&~ zhdx8ng0bvryt!Hf=W?Pi$BFI^+*5xN`L`C}%tMLgg8C?X)6n-2rr#9Eh*$LC!5u~C zm)>Rpo06)l&#BIcD`EGVCs$4`D#2A)nz&XY0_GeWznKAm^0*n3L+wgE)yhk|g8Pc- zMh?F}bouk=r+f?Z7Dr5qhxh4!XkekU^r)%Nev2F&OB)|!3LBqbo@bl%(y%Hf*`oOh zt%7pcaCZI(0|10Kx=!i)yq=495WbSvF&U;Mom6H*AP~p=Qwu6fiadSlkNF1szqm2C zN$9_)A2a)oEE-#;zaf>c@lgjg-tq+oi@Id)qPq)Um`@)!dSBN;b@muMi zV#`a&N{KZ;^uDtC3vGbd`Ww zu9nRGM*wJf3LS4ShTmQ4wAO?@y%zZ|kkxC`F1!4EhcBpDsNz8dlE4erKuNhp{df}1 zAu%iT4uTj^N!bATb0O~2vc}+d#qOHoX{u+$$Cuo*GO}J67eLg0C zh)FT>`f~^Piy5WR13w$lEoP0)L*OIlAM5%JNFRC{Cl-f@iIesG@o{ih6wDT5R5aTq@J7)@4W8q{Vhne!Kj74CNA8218G zl9i;$kG#h|NSgUQnEATgb=WScg9GXPKlkK6+~{*8#sv~Aiu}YD1h`QfS9|l7zw(;@ z>~^4=`C=EGTZ#tUr9y)D;0L-eM|sj4Sqbqd99pF_I7chGwf{!*yQci{3r}jWIO@j+ z_rbb=gXpB^hKY>mHN@bbHa0`5HYv5U-s>z}=Xv|h5(!nGxF?a9yWf>bCjCz(f3>t? zIKTZYZH5nroKXqy0ia{#FH`n6kVn1W_}{nz03@nb(rGraEB%m@x_50Nt3H1JJKz6R zsvyX{WIzUhfdRoF!NDQ^b}~WECIkQy83h%MNeGIVgp@@Ios5|cgPhfm0_1VRfILwk zB-k5Z6;(NYbE0ye)hVJOq9LtqJ~ISSxc}8FnvAD`8_vR*m<2$#PRttQmzl)mR4N+a zAec@XI$^{IA?Y>6n)&f7c))WT>kiskP9$Zaqb;SNPUVYZJp(1wH(63c?=c0~F*zRv zvtWn!ql-Yiv5Uc4MIy`8!letsh@)fhGc`oAa zYdhq(iG_j#r0r4P>y}co=$z$`*VvL&dnJNxyT$IDRwx59Pka@E`rd72j1`g%g(0d+ zE1OQ5SKALU+XQa_a_?;EdNOkCCEzYXPJU%kJAduqqG#u8CL%|@Z$LK19`y2;yM-q{ zk8B)r30b8WgT`Y{y!=Hv+X#J!##35f1?YEe#%)#)1$WzK0mm=dY?Cmdl=|@ScAr&C z6tRvh<6UlT`0rUE2^SV+9oof3N6o|MuT>aeuCeen8{Pn0VUK)v$(fQcE2oXB>SZ;{ z4GGunx;x|EzRKzvw_+yVZt}L&<<_5Fq#wVI(vZ3iAd6DP^_sz6;;^#R`4Kc_DQkIF zYq_+z0z%(+)s>deK4wiPy@;1I&~|cwGt`vkFjQIC!t}({_9#%MaZ#XcGPOyxo>v|L zOXh3$KDxQhXz*Y)H+N-xO;7Tu7vN|!VON&bE4;!9h+1MP0}F@&bTyysfqf`nx&|dAS-m1KK!i)SDOuz8_cbFZcC947hT%y>}EK5TsfMEq!dxkpzmC73*P|U8x}lHP0&iE z>Emr z+RSMoL-3Y=RF!sWYCRI1Fo6dxYL(bHX}*h{I#;<$ zke3Keli;nlVtlFO$nk7iETFZNHWlk1d^~F#N#HZSos-V0yawgC26n5wftmuGa!(SbXgGKY;{4-|)odgunh;Y+jFWmKnd z{y-)(G5x`u%g`Z@rSr{cnI)k>lu_|Zr?~7*$}^Ib$_B4yHkaU#dqE^!yLeYkp++}u zaF%7*UYhW)^%50vbmK{wIlEiF(ED_xv0TL#384ungZ^1?1>lQ8z98qUN7e2w82aBP-Cku%^uOqtpuD5Vawx`ZlF|Oa* z@e|S9@D@6nRs}XI+(QC)P@jQp)NLu|!KfhIYm{u#&T4nuu0iSH*ucA~Yn{5=#$u}~ z4Zrg0MtIxly}Pg?v#WF28$i|AdrjFF!)LXLc&B6cfb3>>X18Kx#<#3q;GCWa@rSP7 z;_kurcS&}~O}(a)7<##!t?tQGU)kol`T3IF1wt{?dcC$~DOo&}Z_Km3QZWVJy&TX} zSnj&Aztp_1yH_RaYf8}eiQ+(pe&mCl-!62<3_49Io6L~I3-#BH)x(pW{`DOd-q^PQ~7 zm8zwahRzcV{o+Wy4Pc5w3Cbj8oo}3k8zY;SV8bN}y1dQC|Mln~< zSZPbk2Um%C(?v$GB8erMq!b;Fv`xsTp;_W#2MM?off;>7*(-y{9vGr;v6rEwYWT{sCzg@b;0L@pYL8`!Y^N+%v;UgF-`F@45P&XoUK= zUCV5+4#~KVTrrmAY3WBFl_co(>sCX6_^=HqeOEzfs4bsUlUR}D%Rg$#G(3x$-G}zh zeDE*%fUlLAN;&`KcUKp68bz1q2N61h}+xit{@!wv@^(}i!*0o zreFp^Dkw;T79TgFwhy%rZ3b-ywJ+m{$_+$$1GM}8b|X;$KyWY!NC+^H`}oI6{T;Q0 zKtUy8VumDU5fVm5BUKhLK(}XAa!~o>MuNOeLEvj83f~FV<<6r7Wem}QCSJDnJOMI+ zMO{+|qNK#4EqbERP08RM9&cwr=1AHyyMI8R@HiqSji(Wzk?kB@DVJEx_`~}Pcc@qAlZaF z$;!u=d?OxWVBJoxyC-(IF|Tc|vi40e>HT32`*L;ZwgTr}svR!GUXB;ex@>Qziz?{# z)#44n(WK0ArS~x+dK5Q|nA1S{GEGvcJtk7*EYE~Mjm5cRHcXH}Gzrd;Qs)ja;}A9+ z4T*}sWQHfq=+MzABAT=eGJhd%NlT%PL(zvpNlHWmV#x4axMH8WG#+-Fs@$Or3wv?@ zvCKIx3eN!ZJ%RIVsE);VRzsXJa_8V(MJ1frRaEIVWi=kb`_zj|hhq8Q1Vj=SS*0na z4jE5>q89}Rhc?x@A6siuCd_T%-qfDXnuHYf#+3<$#WMGIG(1zDvG%S;>G>tl10DHZ zN&ND!^m_%~KSE@5vtYKnOi*z#Y&=pn#uIe3Y|-(RPe&d~E%P%9i#x}bCAHrGjYCZ@ z0!oiSebSWcjw7hCIXCa8azOQaX^%8q`c*pzw z=gjluC^EbEX;_#F7Iu{mKV0}uzO-hR)9CS5pYm|O9Gvf}OFl&5N?kO+0X%~~@emab zUzDd8_x#A3o8>VxbS**3S&1Jq%$(XZ%$}Jwnp8w;_0_s{5gamk2F+!3|EVRlTx@`9 zfmI9aoHZ|{GHaVaUPsey^miW`8E-SvJnSW2r ztZSJx)0982UsHy_BVdAy3MdL$4rME8evUF*xB4W0_PpFqxCE?{b80#q zn0D!4%Sfk0Afsga%P2u-9uUxfJvjbll#r-Q%*ezfLTD_)%A{lD2!>dl2Z4!v zbZ6&#EG4x8rJvK;zmWTX$gs3jB+d$$nR>KMg`0gl8RMWu9Kch(;4SE|uAmvMzv!KG=QNABdWp*d#QydaD;w@IW^&l3%;@h^svD zEX5IhF3J4zjeU1KjdL?+8DFwD9{P)qG>!^`$|IIFXMfwS!xWo)**7 z(rU+?b}sU=1fYil;J1}XTQh)PbL8~B9f{$+b&PKWa(6p%pix1{W=%OKKW_e)*cLKq zF=&$6LaapdNhPuaPO%icMB#V&(YXHCU}ZhoYqc3Ag9j5Da83rT$~k+|yKjK9Dg+!Y zzdatcki7#nwViwI_EqNOb^l0D(i!wANv6htqY~h4&>JA2xFjCj0^qU))5tUcdu52e7%ee9R5FI(*HQMyzTBED+fAhY2`2AxZso8gpX<~vZ!{U z7BDl)7Vi@LlMY9U6Uvy|z2l`T8`sobKLp2%6|njO<`V6lI|PaFEpu>VP(;0@xy`t;1;ok6|J(xW}I!g%!7k7V3ti~ zY#D~V307cl@>QUq-u?PeE>~6&Gf7+&m-uFnqCw@gNZ5UV?5j$pq36)k;P})o7sQ@J zV3sm8StNm>2bFbv=2$8v2}B7=r1U;o6tZVb^vKlk1kIDt{4CB*FzhIKho|arQ#$Q| z_Ly_y-J;fOVt?jkK_MbdxWFl^=90E!OoNA|mZ0m^!k5=^o6)VN7ny)bI{Sd=_vP_3 zxYyDZGs5^yUx%bLOdF&L;3$S!Q?O-OqonpRocm>G2MWoy z1V5qlu?!&ch3M4Kv*NUSHagR5PhEKRc|3ADW4_qIKli7DPGF7V7K8@B$P7nDjqhfU zi+{kUhiDA)DH(Es*X{m9;lsfy60S(P;G9HdT>fKndaBxj#mdAsW|*B53H6dY6o0^d zRHh7pJPbi=rF}N2xMIvAeT|@hZ`AWvExF=uNK(1>=^B*_caSFIdwrPagPMw0~v!9nFGG_IP_+;H< zlpM z?<0XJ)UjJmK_jt-VR2(WXQ2#PNNw0O=`GmJ8q>-U#7RXUx91@Wb!D*>ibIGVS4fUH z<4tjpR2^rzkY`rzA_`vDul)*5L%?o*F9TDkV1@iuBK<63%GUP?wT5?QeA4;)R73(o z=w&LiFTD>04r-vSxTu(`; zv4CJrX;33zn-$V4;j+??W@Butl>{>V(FVmCqFNbi-`LhNbQ3S>w*&-dvcc^Nc@GR- z%b1H9U2|Rgq-XlGJXLR*WW@ZwuxAHy6jrn%1kFg%Q|70>tb98jKGKqvUoMAL@!{AYcE2X6qohZvj6C2T&&isUqwtO}3!lIB^o@``L7f@7xT^F8eBS%ryS zV%HWbxs}m{iLoCO!?$!=?~YXeshF`HVA}l~&=2eyC9$YiAq3rzbgQO3l``xL^hy6@C~Z z{qA@pEzPn7Z9Ah@U=L3u7~dH4JQzUhac@|JlK?t2m3)u5_C7pPEtaoIO9J8N`KQ*> z>mS!Bmp^0p!C!j|EqQup6O-tg$SArwD;VM!GSh0pMvCqIMJ`4>e)&@q8g)qb^4jMp z>j(XW2!icV!E&$&&7u@X^*&OZ8=!$eap&>>o;Y^N&5o|+?v|1%TLN8P`->LN3`e9M4kU^c-hY!Zd3 z*UOekMd%vE0vn=T;q-_bO12S}f>eaUorbo9Uxo%KbU`mYtr|EOHHP0Nj~=rIDZ>IR z}Q;g;Y{AlWrea!yGw zeyu;90=w0?5+h7zdcEp=Wff&tV`8i%M^gbg{;aAzV|EcYXX1g%W0aYSNxcGby{E9* zmczuuBIrj2j}d)gCW$?_uqh>M36w`va~ejM9qaKziT7T{5n09&_fD6Jy`08^9+H{- z!iqzJ4kT=}lk$LFyG&HI31# zI4@6_AhT$yC&_+=D?#kSXR)m!Q&%?epRy3eo2ZrcA2(*Bw8J}4FTHawBAZ#{f66L} znMX|4x|=o1Hn|#}8rFp%PJ8H|@{vhnqA$d$d)A9cTXgXUDoe#iepmVM+ROY|G19VZ zqqR;ZDD+zpv0U=F>2;OGlAY4oV$Ioli9i)GU@pzl>W;>!Lrx;z(w%E>01lN53fZ4! zwsZ6Apz%mChl+guT*^rf=!U0O{mz2!L(hhF7-e|66vicUnq9<47AuPy>HO`24xXK*3N8aw!SpmLc_gQ9H~vdaanIc`Yp? z6-8w9Sw~P3i^X!XVP}-WFiv{9U5ae1CBqCVJFP|eN#{a5jRiwNmCcHx=_>VsR!IfI zBPepeUhR7{|5elGTXj(-ugqsF=~a_5hK6%Yxj}i$ksl*qaaLw6T_2SZELbg%EECy} zl`JbPgVL?Y-vGHS5q8Z_ppSaN8-V8x0CM$yTQRCOj zI=P+#-L0uyFfe{9)I7Z)x~bD;<=5z#gG0BovIp=aAo1NT*!cZQ6l&pny|UQkLS4wi zIGm&N10WT`%JA-hWt->#HGl7I9)taSCi+PDLz7tS&l@pZ$u;AtG(jn0*P4I~QjBA`N@5MJ6E(?Vsb`vC6!i1zrW z!AD{lkwK0(0HjpM_gNlZSKoJPs21J#d8iVSP!eETpFoQ^n{}O$n5IA(j6TM*?V6P^ zc`!r{9cQEY4zubc*?`GupN|;4Y;Mo)q&1zFEhR=L4YNZC0$ZYXv`iUFo6!WKZ3?zF zureqjIh!h{LBV8!OI|7~Qg=eI@~+uYG13d>C||aEsP;Cwg=ZwVW0H@)RaQe(VZ5nW&D!g^0_m8Y_I3;PJfmr~D^$5`%*hft%cHVI@H2)GT^$m~)wwM7ECT_OpgdRVJ`tb7C4 z-@vPW#v%!$I?Wb;d|&DMNlX|RF?%2gl%}uC*mJ&=mZ4d7NEXa2WwnyMQ^z z6(pS>b?2PRfYEK<8mp#ZWYmn-Ju=6IkZTup!$phA%5CXdqb{dt#v`1hkAD{e@Xef~ zVx(3o`Aj*Y`5KyRKO&J)X~lY_YzUQ|>Yv|VylDAeG9SXC7omEoGJ>}CmHIee_1pw9 z*4`%4V;;PpOyLON=hvh0{H0E_{7h@j+yZrYcq>3e{u4`ElN5B`#xDm}MoJ7yvumOj-XC9^n9!^!6fy1fogi(q-f35fSOD&64${b|q7wubSd+VR?!ut+}Q zW&86`G@o33V%GCsGLU=|E(2f@(pR4DocqURAz{SBTl|N8)~M~C$pWX6f|X$vV-bE)Em%4-z}C)cS8)taYisH6C7=A|u@x&j z%IYl(RG}CQ=7J8y@*rD>dqPJM1s{KH=Zy>raRPIUBPlbtqfiLn&kx92eHT&xH8qi@ zM>~YZILPFacjO)9dbmz=MiaBKqYVFy+%(s2^RW*P3IDj6Yl|Aex){dSnc~!FPI~?q zDsv>5=Z(gmB%ScAAEG{#gg?%R4}25wGu)JsgiAnS(ogX0IhiIe<%E(BXB%U^>5ctV zJ({Ic@x$Aw;HHHs5i*^bn7>$yanHs5aP=j1m1RF_TaCQ(50FA{RS1gi!?LO*q&YDJ z3V(e$3*-Y|-wHNxM4EtDd$1S_Dehd7@5;R|<&M78$@>zKEiYY?Jvz=GPY%Xrn+p)q zH#p(c#_-OGed*}BsZMg&dENqF^3_ny$ngS-PNGGd2l0WEfhh>Ol>46PDVkx= zv1#ONb%^t1N8ZK8UT73bpc2X1YrX-}hS&y7Pl-p9=`PP#Kl_>z`PJfxdBh?I99p9<1EwHW|P9O#sCtA zg(LXuwJ@T;0bWy}g`uAAc;N9P(&li=zN~;>3whDC@i*{=zX3?o-vIAeh`-57+nx#} z9+-bNjErQ5?qPf=W%Tn#DRh!j)@nCA%s+kldMT5)i_;1J?38N()?l+nU7!K}oINuo z1~8o-Sd(>giiCuCUC7Mo&W)i@LfR8_K?S(*LxS1B+3${%l2*M=KaH zWz*`5i>N+EP<=&_>?!3YDpJctkxc zZbr|}*jEb?#vS^fU~xrSIwQ}wI^&D>hmj|G1f{?eG3=HrUc^#f z{`+G->Yy!AJDcy1gae5SbX9WawP3Q6$EAXeZs<=ibFvczzh0TpjSU+y>c_pojOQ0W zOKYt5KN0z1(AUrS5g!ROQ@?VonhSk03|%EdJbnWRbdL>~hs0B{P%0;&)dtNMJy`KO znIHKyQ;z{}7w+73yc-C<6V)F5Q0X$x)Mt*((WIN_5?paO?fX&k0}^(n zo>TLf9YT()Pwd4_v#&Sk>+JqYD}#3zJauRwXWFF9-8ID z=faWYw^59dDcOKQyjmv_*ijMj#R9k}g~4etiCNuZJ?DeBUqV26z?s`Zo3(oR=_RS; zx|NlmCI3PNmHD+H51bZ?6`cQ_+*P-bS~IaDd<4*wH(Hr_MH1)zGuMiFx z$qXsnfV2uP0b{G4rX-M8^LYCP&@GM)?6gRPBPzs4puZU_3p@xFjW5R9&ta#hty4`v zs>JZq2!9-*`67CfG!%*?VQrSzDU=DP6hu^(n>JUP1U7KLuJOa0Za&GohwYFCcMkeC z_QPWkWxuBkM5TJxc%Ia7F4?V71JwhdGH@8=y(zkg9H_-CVQ@IQDnAfaA*E@k;Xufb z29ZBdwMph^IwG-2liwGmnWYnnA(7qg(CnzNg-<=-HXkSGq_eN0m_ARlRiFqFJr#g_ z_dl;o0Dk8zBL81ar2F5MDNFLz0wDf341!Ei;J8|#uJ`{9GJ(MWhImFnImlE`*PwS` z{*S^0%~#jUck^f9Kn6NLzyC0xB!4~I(hSqz6j(dr|1bdZZEOIHZ@@nksO4|}R6z-k zBVaJZ3I8Q!BAxjC2lBxa2f*z@KH5&%P7eDO~c0O-p7rw$z96Y!6uJrnwe z0QltdZx#MdmVegjE8V{Y0H40!-wmhk=j%y=&;KR7LMNP3{e4Z74A+x<*OU0Uv41mb z9(NX8!T)LSmHmeJuFwB|j|iYqa=q?x^XD>w6Mvvd5-aE)^55K@dh6c5)?0Ib*ZI!> z2xf)60cfG`VSter=OE0xq~|vXjCKVn-HtxLo}@dM%}8nE2SLAn@a6sn`L6Bwu8-Y8 zK0b(tcLL)#wC>%}^BV(53Mfu8oa+76F`s76Z|K+R!5<9J7sLKp@^9$+`-}E(L6=*< zzd$Bc#kD`z`J18cJV$YzzvAB%RUkUipYHx*$eVZraJ~Wl8qRNNVezm3iayQwL&C6M z|D_c44dk=?S9G)@5?L7X>0b;WEc2gWNv$vhGRFV0tbCWCKK|A_WCVdM%scjdV2l`{jUa4h-r3`>-v9+{Kp4@9Pqzd+Uh8f zSe|SLhKHdXf9k#4RhB@2xu&p5I6PDqk`<32tdY-I$Ys#ox8#|=^F?Pp)jVAB!gYhc z%f&+=M&4>!dX13?g>S^)^|BXaj1#7($@p-z$fBpbu#&aMk6fpHwq0V!$T(Zo&4y+f z^ON0Bw0UxS8uLO=GMU%QORvc(LW{Hgg&+A8?SxR%joGuide;BMJ~P4-c2Pf%)N=x+A6c+g8Hch@9qK?KJErZ zkq%!gRCCb$OjSkN!+h*5l|yZQ9F~?VFB?6>iasxuD0m6ecLUYr=9-%lX<9i6H(s?j zKyzlFEL}Txsg~Tqst9>HNw4?B&BJcRZP)8ugWI4>-8cXZSB5Y`XUh-Suq5Beh5Pk` zCG(I83VF6c(qs`&nIa$WuZN=K$tn^?(LidjS`6a4?(SO^2|^4bgKi|#&>yzev+-Na zMn6wQWOA@gxI0Ue=w-H_JT~PYRS2CMi_G8k33dyt2}{Vde?7T9XE)=5exzO|sgO?C z%QO-=l3l4j#_J@iXL@iij;Djvy~F0coNAG7U4Jfw+gIp(#?8QEv%`y7NE<65xq#Hq z>!wNi8dYVTRJ&yHjzuZs+oa%{6%xtx*Xx@(AOeYJYzlhhlqWmCr<%`-iG0Dut4kYD`#~ z!_UDy>L@gR1AA$EP{Jy^D*v-bRYpuoiq(!@DiG#@uKa_{i+NII+M^`R$}v~=Xp%&2tQ(JDDGxncl=&o z^~u=H+I!oc+u&F8Zi>m_CjddsJ##DpN?yDh3Xalkt`dX)MudtdNns=h zrMe$584MP|3w)-50261V(#jV7fP~2r-}G|FVVtV!j;gGL{YgcUfD=F#jHzaD$rlj(4 zmX)m`Oj1tDG}jGJ?TdZc1bFj2+}dfFi0W86b)SV!Nb^Rt z8&Rpr9njO%pETw8xAwG-0zYZno_$9iFI5%dwbiix0809jy92!gksP++xNLiRBgwmU}JDlsz6=)XmB{-=-yVR=fC_ zImvV}kCkdmgCz^B-b!uqG3MWBGz26l_u&7q31ntrWg$@7K^#+F9Wf{xeQ~+@Uo;wI z6F(13r|!JwA~88CMDgQ5Avq1Un!C|AfOVM>C`00nURsLxL`tywL+les*udwT32wQti8@_f>;m&_+c=s zOSl1fy3}3GXVGin5ne&(%>fhYi3^xiMnIABzWNwa-WFP zKvL5WN6FwNTJHkyBEQne{#5=EHvCTd`EfpMe8HIg0HWe3&R$O5d|ce4PT4SbXN9B|8y-H5fKA(#hq4r1 zQ_3;H9ZWS_Pg0Lhk5z_9mY%?4YZy~1AP-%=OV^X&S%tE5jt7TGx{l+N@`GjAE%>MX zXR0)RQkM6S3i=kYBJ0aO1f8kP*#qK!(%%yz$^5O4CqnzuhG=!dk9F7vFb(t%fYON- z9eOD_`Hj@evd>(+n=G8*ua}HzOO*A$`YGbVEo;PYxi+$#mZ{Q&kg?GhmUC6uI%w9L z7N=2+HC(CI{cZx7{D0uz>RpG?Cz)+`T7#r^nqSgcaWljxo z_SIJJdwQWmcuF};RLLbTPt`E~P`y|q_d=4TXl6$UtJL0}Aez#5CxjG2$yIxDfgHKL zsm?sAbU;cSfIq2-NA4<{-iV?NsL6IfgPRSkA=Xv!&HyyUUs31#f+sl?9NqtH7S14pR1N-zP!)~1(}j_ zDNtH@L%O zuEJ;E7DCWon16;6LUKAXmpQW9=gAfvzewUGmV=~DJr9EasnFkD&FsMtL_{Qel1(fL_ZOKQz0X zD3Jh#P)E)RMm2(-1V2O=k%?!sAy-eR7~@|u@8~<`7lNy=S!zv6l*qVCujpb=t(8c_ zFR~A!Q3$^ZCMTxf4MgR|dcF^==FS@ZA&l&!d)v*T7#3fQY-W5lXR1jYVrG^Qv|@XV zlg-tqPs#70xC-d?Ba)-+polLbK}K5@>c5bKmWeFi8yj%a&hcJMU_VtsT>gv)6;fsF zD)WQwOnMs|mXKt&VL(d2N^{?pO4=WEX@E>Rvs?uA5_pGR*rsN}x&^=U;hvvzngAXO zIt+D?lblCv=Fc%tpA`-O)aviPdjv~fR`~&ER9(ZlsPzp|fB2zf4??Hz`Xp6O?z)L6 zo0tkx8z>6vRGj(HW`t%NbpS5vCKJU%`bfO)#tA<N%TgV$1BY0J(PeOO|>q;AqGPklfWFQNHQC*n)LA?`^hv?7jLG$ z0dy|4xr>D_p3;4usulcmZEgs=*n~^x9OPq7i+_we_a7k#TTnANPs>R2n+vTr>9qx4 z`dz!QLFz%Wh6}lo)e86I_C|=MtHS2-RrgU^GvFBVbkBaPl?Rq^CTT0zv+Nilw9@S? zx;?6THeL!8h?Z>NkjeJA@8*Mhk3Vj6;MQyfX?Athen%z3uFAq)Lw`cbMi`YzCU51| z60pxvh1%9dCaYV>$T%Pipk(wVb8T4wx;*X%_%nRS5ps2N<8CbEBQH|0e0&!q{Fy+5 zyh$ckmUgFvIRPDY%}h&0v8+hu{YIox3xqT9>OxfzMtVL6$_ADS!h3LVFh>L`3u1n1 z)u=%yu2kmW@d8kOXcX5Wro`%^-AeK*>Noz0R&b@??&R9^=H?V5Eg7;eZ)YIZS{-fx zh?rfFpvQ@x(-`7A-7`s@##z@smHW#pW#N6zAXKg~F~Uk+6#U*q)>YE)|Kjppwoy6#zrwhZ2Kl-A0Migo}ggyHxOuY|>7 z@+Q9@n>bMUC3x^u7o7Bwr0dMCy8gu9Ll8R{J0^8HrF0;X%+|eFDWM?e{|UkfJ@<*@ z)-~}8ji+b5=|+bT;4%YDAY^12n*}$%F#ViTBOPPW%PPnMZMCI@wG|j^6kZc+r zx(b&}<*C}E0@l7$%SU?2NszD#2iVNDr& zi6T=eZ!@MMh-FKa$D>X`;nCSQ5);rbcb+3sQPHV7Zr$OFkY-u)Ik{5+iD_4YIV|4u z-A3}-dG@`>1OO<6bt#StyjpAbB=NzrvPj+^;KabmQEpzbi&H96Tzqtq0wBEkH1SOQ zKthbf`k|g7WNNN~s?c(VwS(F@=HhltRrv=0019mHD3y13M#5pVMoL-2BGJslFdrFm zJ@B5P!468ab$bM~E=mmo1KS5oSphkZY(5MmvKDIU*7$_336%KirdHuAis>8v;9#POYOeN zz`_I=TdnnN*~O$@&{2@k_1J{X6$R>}^bJT$w!mojDWQ}A{sH8}fthee-EQn_P_I}^ z)t?4N1%;&$2K4?&ot{%~mk_WND9lZ1evxuVKRNfnjHNVP1^ZHUia-c3j$`YNV?mZS z?^h$6Srdlf5Ma%q6125_4#7pZ2I##Otg)(fqyh-!n1%X+L9#U*L}40l3pYkNQ5QS; z399dev0%^zB*=$OW6PHn1r8}2YQG)vyhUOy`{sN4(L>AC4?h5k$C@A`PEdW!zuR1x79;vXRVs zRIqkdKQGNfw#&^#m8*n7@*iqaU7AtZ^im{kc)+U>Hev7wzPQek76Ub@US3Ji+KX^# zfi&wf?UPT;15Vw_GUkyna)|=M7@lj!q{!_`v-9;OW6b%IrNL?m-6yp#g#IUrf<2^f zbUxJNuQTRMRtrESUPTv=jEUsE&x#t#`OVxeA%GICOVOYHlrcDEvKv#*qGsoTsgih7 zAeGMQ@v^CKFCqJIT|g0GRf>fiX>Z9jKIxM9`j)e1Q1)w@5FY^ctDeA)n-C+zQRj=@ zd$R2xN6SD|wQ>tSssIrjKfG7BVa?_4qb@ddA${WfMi3);2O<9eOe6trF;K)N%i*XH zU0HsrgEs^2kJU#=x}x}RLL`izSy4N8TFDB|kwgMGk9eF@V`4c1NU(qypwy_`ujEUAr{%2nr_`59Le56o}YxCvp(e6ouZ(ag)J4 z(7^d{&S@@%GO1DS&!mhwSDF0cm-(;7z`0Yz0Z!~~q;?RWnd2jIo4OMGm_ZCHJ#75? zX(OpUjDM}a7G)J6Sp#cgx8bic=d$?YB{CEDVD>PA+nn?Z`t;BgOLs07wqUh12%a7I z#g(!Sc%&SA`HvIDg4oTW7>Ta2d?5D9&T2m%kcA-VHGEOo1`b9J8MIFIbLN-hE#l?M zxi|6*5JX8EB>-VN_Pbo|#aRiO={NSEf(#^MBM)N!^XW5hgD$_sdBovJQOKxnPB56Q zA;t5GXOR%TgSLEB;z~add$<*}327H)^Q-8JOb8bJjTw`HpkSaXz*UddWw89gIONqf6ZPj0T)= zIjKn@kzd9p)Iwpo8!aGVYSTGSgyle0{;LrPSm#5k;~lt{UhD< znuI=d(!GC5FV_Hr6xR!CEh$aThSXS&JOe9~QN4m|jxm@9O&hsIy9pr%xCRM%jHY*Qd7xLekq^@t9vq`f_Vr%zK2hIgEe2Ln4*CD01#;0 z7TbpO^$19qw&@}t#q_5~P%rCnY8>n|=ImC_QGbKy7Uw#VLKd<{eH8*!0-sLgL6P2> zHQ5I)iYNo@PJ(8ExxO3sqyh-!nA#*-{{W%NlqnlI@96S)5JZ{-;@#y|9#+@?00%5X z03}*zp=Gjp*TSHCRqMC@im(Ehi&~$yY|bD^2J2HmP?#BN#D3IdTa1A`L7NhNOo03)O3vkJrq)fUgXj=NKZW({Xny$UzK5WsZBNEWR^g4RB5 z1OSW=fNL@{QdUzr3jY9aU+wDjwqmPFuR0FJ0A==ez%y1NSuUL_`(VO3pF-_7&N?}i z!iJv6E_>5xwuix$1pyN=A^OQEWC>|OQT&bH6tKeo05azLQE>f~_21}xgBv;1!-fin z0rxo2nxC;|44$=ZV^`Rr5=2g4N$Df0Pn_w6Vcri6($Dk;)gvR!e(G)KLHne;So{qt zDjTn{6$paVhtJZ;K+ReOWEOv<)SkYsltKgU%mAikhnjY)7M5mDG)PI3KB@9X>l6g+ zi86Pqw%K5)Q6G6h&YJ@ErM(F~Y7m$gsFV%4H?mGp&%bSwO_zso5SEa+2E1DLze_m^7AleJ_A H{^`_ is +one of the most popular search platforms available, and we've made it easy to +use Elasticsearch's vector search capabilities on your computer vision data +directly from FiftyOne! + +Follow these :ref:`simple instructions ` to get started +using Elasticsearch + FiftyOne. + +FiftyOne provides an API to create Elasticsearch indexes, upload vectors, and run +similarity queries, both :ref:`programmatically ` in Python and +via point-and-click in the App. + +.. note:: + + Did you know? You can + :ref:`search by natural language ` using Elasticsearch + similarity indexes! + +.. image:: /images/brain/brain-image-similarity.gif + :alt: image-similarity + :align: center + +.. _elasticsearch-basic-recipe: + +Basic recipe +____________ + +The basic workflow to use Elasticsearch to create a similarity index on your FiftyOne +datasets and use this to query your data is as follows: + +1) Connect to or start an Elasticsearch server + +2) Load a :ref:`dataset ` into FiftyOne + +3) Compute embedding vectors for samples or patches in your dataset, or select + a model to use to generate embeddings + +4) Use the :meth:`compute_similarity() ` + method to generate a Elasticsearch similarity index for the samples or object + patches in a dataset by setting the parameter `backend="elasticsearch"` and + specifying a `brain_key` of your choice + +5) Use this Elasticsearch similarity index to query your data with + :meth:`sort_by_similarity() ` + +6) If desired, delete the index + +|br| +The example below demonstrates this workflow. + +.. note:: + + You must have access to + `an Elasticsearch server `_ + and install the + `Elasticsearch Python client `_ + to run this example: + + .. code-block:: shell + + pip install elasticsearch + + Note that, if you are using a custom Elasticsearch server, you can store your + credentials as described in :ref:`this section ` to avoid + entering them manually each time you interact with your Elasticsearch index. + +First let's load a dataset into FiftyOne and compute embeddings for the samples: + +.. code-block:: python + :linenos: + + import fiftyone as fo + import fiftyone.brain as fob + import fiftyone.zoo as foz + + # Step 1: Load your data into FiftyOne + dataset = foz.load_zoo_dataset("quickstart") + + # Steps 2 and 3: Compute embeddings and create a similarity index + elasticsearch_index = fob.compute_similarity( + dataset, + brain_key="elasticsearch_index", + backend="elasticsearch", + ) + +Once the similarity index has been generated, we can query our data in FiftyOne +by specifying the `brain_key`: + +.. code-block:: python + :linenos: + + # Step 4: Query your data + query = dataset.first().id # query by sample ID + view = dataset.sort_by_similarity( + query, + brain_key="elasticsearch_index", + k=10, # limit to 10 most similar samples + ) + + # Step 5 (optional): Cleanup + + # Delete the Elasticsearch index + elasticsearch_index.cleanup() + + # Delete run record from FiftyOne + dataset.delete_brain_run("elasticsearch_index") + +.. note:: + + Skip to :ref:`this section ` for a variety of common + Elasticsearch query patterns. + +.. _elasticsearch-setup: + +Setup +_____ + +The easiest way to get started with Elasticsearch is to +`install locally via Docker `_. + +Installing the Elasticsearch client +----------------------------------- + +In order to use the Elasticsearch backend, you must also install the +`Elasticsearch Python client `_: + +.. code-block:: shell + + pip install elasticsearch + +Using the Elasticsearch backend +------------------------ + +By default, calling +:meth:`compute_similarity() ` or +:meth:`sort_by_similarity() ` +will use an sklearn backend. + +To use the Elasticsearch backend, simply set the optional `backend` parameter of +:meth:`compute_similarity() ` to `"elasticsearch"`: + +.. code:: python + :linenos: + + import fiftyone.brain as fob + + fob.compute_similarity(..., backend="elasticsearch", ...) + +Alternatively, you can permanently configure FiftyOne to use the Elasticsearch backend +by setting the following environment variable: + +.. code-block:: shell + + export FIFTYONE_BRAIN_DEFAULT_SIMILARITY_BACKEND=elasticsearch + +or by setting the `default_similarity_backend` parameter of your +:ref:`brain config ` located at `~/.fiftyone/brain_config.json`: + +.. code-block:: json + + { + "default_similarity_backend": "elasticsearch" + } + +Authentication +-------------- + +If you are using a custom Elasticsearch server, you can provide your credentials in a +`variety of ways. `_ + +**Environment variables (recommended)** + +The recommended way to configure your Elasticsearch credentials is to store them in +the environment variables shown below, which are automatically accessed by +FiftyOne whenever a connection to Elasticsearch is made. + +.. code-block:: shell + + export FIFTYONE_BRAIN_SIMILARITY_ELASTICSEARCH_HOSTS=https://localhost:9200 + export FIFTYONE_BRAIN_SIMILARITY_ELASTICSEARCH_USERNAME=XXXXXXXX + export FIFTYONE_BRAIN_SIMILARITY_ELASTICSEARCH_PASSWORD=XXXXXXXX + export FIFTYONE_BRAIN_SIMILARITY_ELASTICSEARCH_SSL_ASSERT_FINGERPRINT=XXXXXXXX + +This is only one example of variables that can be used to authenticate an +Elasticsearch client. Find more information +`here. `_ + +**FiftyOne Brain config** + +You can also store your credentials in your :ref:`brain config ` +located at `~/.fiftyone/brain_config.json`: + +.. code-block:: json + + { + "similarity_backends": { + "elasticsearch": { + "hosts": "https://localhost:9200", + "username": "XXXXXXXX", + "password": "XXXXXXXX", + "ssl_assert_fingerprint": "XXXXXXXX", + } + } + } + +Note that this file will not exist until you create it. + +**Keyword arguments** + +You can manually provide credentials as keyword arguments each time you call +methods like :meth:`compute_similarity() ` +that require connections to Elasticsearch: + +.. code:: python + :linenos: + + import fiftyone.brain as fob + + elasticsearch_index = fob.compute_similarity( + ... + backend="elasticsearch", + brain_key="elasticsearch_index", + hosts="https://localhost:9200", + username="XXXXXXXX", + password="XXXXXXXX", + ssl_assert_fingerprint="XXXXXXXX", + ) + +Note that, when using this strategy, you must manually provide the credentials +when loading an index later via +:meth:`load_brain_results() `: + +.. code:: python + :linenos: + + elasticsearch_index = dataset.load_brain_results( + "elasticsearch_index", + url="http://localhost:6333", + hosts="https://localhost:9200", + username="XXXXXXXX", + password="XXXXXXXX", + ssl_assert_fingerprint="XXXXXXXX", + ) + +.. _elasticsearch-config-parameters: + +Elasticsearch config parameters +------------------------------- + +The Elasticsearch backend supports a variety of query parameters that can be used to +customize your similarity queries. These parameters broadly fall into four +categories: + +- **index_name** (*None*): the name of the Elasticsearch vector search index to use + or create. If not specified, a new unique name is generated automatically +- **metric** (*"cosine"*): the distance/similarity metric to use when + creating a new index. The supported values are + ``("cosine", "dotproduct", "euclidean", "innerproduct")`` + +For detailed information on these parameters, see the +`Elasticsearch documentation `_. + +You can specify these parameters via any of the strategies described in the +previous section. Here's an example of a :ref:`brain config ` +that includes all of the available parameters: + +.. code-block:: json + + { + "similarity_backends": { + "elasticsearch": { + "index_name": "your-index", + "metric": "cosine" + } + } + } + +However, typically these parameters are directly passed to +:meth:`compute_similarity() ` to configure +a specific new index: + +.. code:: python + :linenos: + + elasticsearch_index = fob.compute_similarity( + ... + backend="elasticsearch", + brain_key="elasticsearch_index", + index_name="your-index", + metric="cosine", + ) + +.. _elasticsearch-managing-brain-runs: + +Managing brain runs +___________________ + +FiftyOne provides a variety of methods that you can use to manage brain runs. + +For example, you can call +:meth:`list_brain_runs() ` +to see the available brain keys on a dataset: + +.. code:: python + :linenos: + + import fiftyone.brain as fob + + # List all brain runs + dataset.list_brain_runs() + + # Only list similarity runs + dataset.list_brain_runs(type=fob.Similarity) + + # Only list specific similarity runs + dataset.list_brain_runs( + type=fob.Similarity, + patches_field="ground_truth", + supports_prompts=True, + ) + +Or, you can use +:meth:`get_brain_info() ` +to retrieve information about the configuration of a brain run: + +.. code:: python + :linenos: + + info = dataset.get_brain_info(brain_key) + print(info) + +Use :meth:`load_brain_results() ` +to load the |SimilarityIndex| instance for a brain run. + +You can use +:meth:`rename_brain_run() ` +to rename the brain key associated with an existing similarity results run: + +.. code:: python + :linenos: + + dataset.rename_brain_run(brain_key, new_brain_key) + +Finally, you can use +:meth:`delete_brain_run() ` +to delete the record of a similarity index computation from your FiftyOne +dataset: + +.. code:: python + :linenos: + + dataset.delete_brain_run(brain_key) + +.. note:: + + Calling + :meth:`delete_brain_run() ` + only deletes the **record** of the brain run from your FiftyOne dataset; it + will not delete any associated Elasticsearch index, which you can do as + follows: + + .. code:: python + + # Delete the Elasticsearch index + elasticsearch_index = dataset.load_brain_results(brain_key) + elasticsearch_index.cleanup() + +.. _elasticsearch-examples: + +Examples +________ + +This section demonstrates how to perform some common vector search workflows on +a FiftyOne dataset using the Elasticsearch backend. + +.. note:: + + All of the examples below assume you have configured your Elasticsearch server + as described in :ref:`this section `. + +.. _elasticsearch-new-similarity-index: + +Create a similarity index +------------------------- + +In order to create a new Elasticsearch similarity index, you need to specify +either the `embeddings` or `model` argument to +:meth:`compute_similarity() `. Here's a few +possibilities: + +.. code:: python + :linenos: + + import fiftyone as fo + import fiftyone.brain as fob + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("quickstart") + model_name = "clip-vit-base32-torch" + model = foz.load_zoo_model(model_name) + brain_key = "elasticsearch_index" + + # Option 1: Compute embeddings on the fly from model name + fob.compute_similarity( + dataset, + model=model_name, + backend="elasticsearch", + brain_key=brain_key, + ) + + # Option 2: Compute embeddings on the fly from model instance + fob.compute_similarity( + dataset, + model=model, + backend="elasticsearch", + brain_key=brain_key, + ) + + # Option 3: Pass precomputed embeddings as a numpy array + embeddings = dataset.compute_embeddings(model) + fob.compute_similarity( + dataset, + embeddings=embeddings, + backend="elasticsearch", + brain_key=brain_key, + ) + + # Option 4: Pass precomputed embeddings by field name + dataset.compute_embeddings(model, embeddings_field="embeddings") + fob.compute_similarity( + dataset, + embeddings="embeddings", + backend="elasticsearch", + brain_key=brain_key, + ) + +.. _elasticsearch-patch-similarity-index: + +Create a patch similarity index +------------------------------- + +You can also create a similarity index for +:ref:`object patches ` within your dataset by +including the `patches_field` argument to +:meth:`compute_similarity() `: + +.. code:: python + :linenos: + + import fiftyone as fo + import fiftyone.brain as fob + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("quickstart") + + fob.compute_similarity( + dataset, + patches_field="ground_truth", + model="clip-vit-base32-torch", + backend="elasticsearch", + brain_key="elasticsearch_patches", + ) + +.. _elasticsearch-connect-to-existing-index: + +Connect to an existing index +---------------------------- + +If you have already created a Elasticsearch index storing the embedding vectors +for the samples or patches in your dataset, you can connect to it by passing +the `index_name` to +:meth:`compute_similarity() `: + +.. code:: python + :linenos: + + import fiftyone as fo + import fiftyone.brain as fob + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("quickstart") + + fob.compute_similarity( + dataset, + model="clip-vit-base32-torch", # zoo model used (if applicable) + embeddings=False, # don't compute embeddings + index_name="your-index", # the existing Elasticsearch index + brain_key="elasticsearch_index", + backend="elasticsearch", + ) + +.. _elasticsearch-add-remove-embeddings: + +Add/remove embeddings from an index +----------------------------------- + +You can use +:meth:`add_to_index() ` +and +:meth:`remove_from_index() ` +to add and remove embeddings from an existing Elasticsearch index. + +These methods can come in handy if you modify your FiftyOne dataset and need +to update the Elasticsearch index to reflect these changes: + +.. code:: python + :linenos: + + import numpy as np + + import fiftyone as fo + import fiftyone.brain as fob + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("quickstart") + + elasticsearch_index = fob.compute_similarity( + dataset, + model="clip-vit-base32-torch", + brain_key="elasticsearch_index", + backend="elasticsearch", + ) + print(elasticsearch_index.total_index_size) # 200 + + view = dataset.take(10) + ids = view.values("id") + + # Delete 10 samples from a dataset + dataset.delete_samples(view) + + # Delete the corresponding vectors from the index + elasticsearch_index.remove_from_index(sample_ids=ids) + + # Add 20 samples to a dataset + samples = [fo.Sample(filepath="tmp%d.jpg" % i) for i in range(20)] + sample_ids = dataset.add_samples(samples) + + # Add corresponding embeddings to the index + embeddings = np.random.rand(20, 512) + elasticsearch_index.add_to_index(embeddings, sample_ids) + + print(elasticsearch_index.total_index_size) # 210 + +.. _elasticsearch-get-embeddings: + +Retrieve embeddings from an index +--------------------------------- + +You can use +:meth:`get_embeddings() ` +to retrieve embeddings from a Elasticsearch index by ID: + +.. code:: python + :linenos: + + import fiftyone as fo + import fiftyone.brain as fob + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("quickstart") + + elasticsearch_index = fob.compute_similarity( + dataset, + model="clip-vit-base32-torch", + brain_key="elasticsearch_index", + backend="elasticsearch", + ) + + # Retrieve embeddings for the entire dataset + ids = dataset.values("id") + embeddings, sample_ids, _ = elasticsearch_index.get_embeddings(sample_ids=ids) + print(embeddings.shape) # (200, 512) + print(sample_ids.shape) # (200,) + + # Retrieve embeddings for a view + ids = dataset.take(10).values("id") + embeddings, sample_ids, _ = elasticsearch_index.get_embeddings(sample_ids=ids) + print(embeddings.shape) # (10, 512) + print(sample_ids.shape) # (10,) + +.. _elasticsearch-query: + +Querying a Elasticsearch index +------------------------------- + +You can query a Elasticsearch index by appending a +:meth:`sort_by_similarity() ` +stage to any dataset or view. The query can be any of the following: + +* An ID (sample or patch) +* A query vector of same dimension as the index +* A list of IDs (samples or patches) +* A text prompt (if :ref:`supported by the model `) + +.. code:: python + :linenos: + + import numpy as np + + import fiftyone as fo + import fiftyone.brain as fob + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("quickstart") + + fob.compute_similarity( + dataset, + model="clip-vit-base32-torch", + brain_key="elasticsearch_index", + backend="elasticsearch", + ) + + # Query by vector + query = np.random.rand(512) # matches the dimension of CLIP embeddings + view = dataset.sort_by_similarity(query, k=10, brain_key="elasticsearch_index") + + # Query by sample ID + query = dataset.first().id + view = dataset.sort_by_similarity(query, k=10, brain_key="elasticsearch_index") + + # Query by a list of IDs + query = [dataset.first().id, dataset.last().id] + view = dataset.sort_by_similarity(query, k=10, brain_key="elasticsearch_index") + + # Query by text prompt + query = "a photo of a dog" + view = dataset.sort_by_similarity(query, k=10, brain_key="elasticsearch_index") + +.. note:: + + Performing a similarity search on a |DatasetView| will **only** return + results from the view; if the view contains samples that were not included + in the index, they will never be included in the result. + + This means that you can index an entire |Dataset| once and then perform + searches on subsets of the dataset by + :ref:`constructing views ` that contain the images of + interest. + +.. _elasticsearch-access-client: + +Accessing the Elasticsearch client +---------------------------------- + +You can use the `client` property of a Elasticsearch index to directly access the +underlying Elasticsearch client instance and use its methods as desired: + +.. code:: python + :linenos: + + import fiftyone as fo + import fiftyone.brain as fob + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("quickstart") + + elasticsearch_index = fob.compute_similarity( + dataset, + model="clip-vit-base32-torch", + brain_key="elasticsearch_index", + backend="elasticsearch", + ) + + elasticsearch_client = elasticsearch_index.client + print(elasticsearch_client) + +.. _elasticsearch-advanced-usage: + +Advanced usage +-------------- + +As :ref:`previously mentioned `, you can customize +your Elasticsearch indexes by providing optional parameters to +:meth:`compute_similarity() `. + +Here's an example of creating a similarity index backed by a customized Elasticsearch +index. Just for fun, we'll specify a custom index name, use dot +product similarity, and populate the index for only a subset of our dataset: + +.. code:: python + :linenos: + + import fiftyone as fo + import fiftyone.brain as fob + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("quickstart") + + # Create a custom Elasticsearch index + elasticsearch_index = fob.compute_similarity( + dataset, + model="clip-vit-base32-torch", + embeddings=False, # we'll add embeddings below + metric="dotproduct", + brain_key="elasticsearch_index", + backend="elasticsearch", + index_name="custom-quickstart-index", + ) + + # Add embeddings for a subset of the dataset + view = dataset.take(10) + embeddings, sample_ids, _ = elasticsearch_index.compute_embeddings(view) + elasticsearch_index.add_to_index(embeddings, sample_ids) + + elasticsearch_client = elasticsearch_index.client diff --git a/docs/source/integrations/index.rst b/docs/source/integrations/index.rst index 281855a627..e5204c7d2b 100644 --- a/docs/source/integrations/index.rst +++ b/docs/source/integrations/index.rst @@ -125,6 +125,13 @@ on the cards below to see how! :image: ../_static/images/integrations/lancedb.png :tags: Brain,Embeddings,Vector-Search +.. customcarditem:: + :header: Elasticsearch + :description: Use our Elasticsearch integration to enable vector search and query your FiftyOne datasets at scale. + :link: elasticsearch.html + :image: ../_static/images/integrations/elasticsearch.jpg + :tags: Brain,Embeddings,Vector-Search + .. customcarditem:: :header: Hugging Face :description: Use our Hugging Face Transformers integration to run inference on your FiftyOne datasets with just a few lines of code. @@ -205,6 +212,7 @@ on the cards below to see how! MongoDB Milvus LanceDB + Elasticsearch Hugging Face Ultralytics Albumentations diff --git a/docs/source/user_guide/brain.rst b/docs/source/user_guide/brain.rst index 95c4de9260..2342c1575b 100644 --- a/docs/source/user_guide/brain.rst +++ b/docs/source/user_guide/brain.rst @@ -431,6 +431,7 @@ another supported backend: - **mongodb**: a :ref:`MongoDB backend ` - **milvus**: a :ref:`Milvus backend ` - **lancedb**: a :ref:`LanceDB backend ` +- **elasticsearch**: a :ref:`Elasticsearch backend ` .. code-block:: python :linenos: From f345e5e36e72fe640cb97e797b4b15f10faa02c4 Mon Sep 17 00:00:00 2001 From: brimoor Date: Thu, 8 Aug 2024 22:27:11 -0400 Subject: [PATCH 2/2] updates --- docs/source/index.rst | 5 ++ docs/source/integrations/elasticsearch.rst | 99 ++++++++++------------ docs/source/integrations/index.rst | 16 ++-- docs/source/user_guide/brain.rst | 12 ++- 4 files changed, 69 insertions(+), 63 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index b8f1e3305f..afed9fa3b7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -121,6 +121,11 @@ learn how: :image_src: https://voxel51.com/images/integrations/mongodb-128.png :image_title: MongoDB +.. customimagelink:: + :image_link: integrations/elasticsearch.html + :image_src: https://voxel51.com/images/integrations/elasticsearch-128.png + :image_title: Elasticsearch + .. customimagelink:: :image_link: integrations/milvus.html :image_src: https://voxel51.com/images/integrations/milvus-128.png diff --git a/docs/source/integrations/elasticsearch.rst b/docs/source/integrations/elasticsearch.rst index cd5e516249..1439fcca72 100644 --- a/docs/source/integrations/elasticsearch.rst +++ b/docs/source/integrations/elasticsearch.rst @@ -1,7 +1,7 @@ .. _elasticsearch-integration: -Elasticsearch Integration -========================= +Elasticsearch Vector Search Integration +======================================= .. default-role:: code @@ -13,15 +13,15 @@ directly from FiftyOne! Follow these :ref:`simple instructions ` to get started using Elasticsearch + FiftyOne. -FiftyOne provides an API to create Elasticsearch indexes, upload vectors, and run -similarity queries, both :ref:`programmatically ` in Python and -via point-and-click in the App. +FiftyOne provides an API to create Elasticsearch indexes, upload vectors, and +run similarity queries, both :ref:`programmatically ` in +Python and via point-and-click in the App. .. note:: Did you know? You can - :ref:`search by natural language ` using Elasticsearch - similarity indexes! + :ref:`search by natural language ` using + Elasticsearch similarity indexes! .. image:: /images/brain/brain-image-similarity.gif :alt: image-similarity @@ -32,8 +32,8 @@ via point-and-click in the App. Basic recipe ____________ -The basic workflow to use Elasticsearch to create a similarity index on your FiftyOne -datasets and use this to query your data is as follows: +The basic workflow to use Elasticsearch to create a similarity index on your +FiftyOne datasets and use this to query your data is as follows: 1) Connect to or start an Elasticsearch server @@ -43,9 +43,9 @@ datasets and use this to query your data is as follows: a model to use to generate embeddings 4) Use the :meth:`compute_similarity() ` - method to generate a Elasticsearch similarity index for the samples or object - patches in a dataset by setting the parameter `backend="elasticsearch"` and - specifying a `brain_key` of your choice + method to generate a Elasticsearch similarity index for the samples or + object patches in a dataset by setting the parameter + `backend="elasticsearch"` and specifying a `brain_key` of your choice 5) Use this Elasticsearch similarity index to query your data with :meth:`sort_by_similarity() ` @@ -67,9 +67,10 @@ The example below demonstrates this workflow. pip install elasticsearch - Note that, if you are using a custom Elasticsearch server, you can store your - credentials as described in :ref:`this section ` to avoid - entering them manually each time you interact with your Elasticsearch index. + Note that, if you are using a custom Elasticsearch server, you can store + your credentials as described in :ref:`this section ` + to avoid entering them manually each time you interact with your + Elasticsearch index. First let's load a dataset into FiftyOne and compute embeddings for the samples: @@ -114,8 +115,8 @@ by specifying the `brain_key`: .. note:: - Skip to :ref:`this section ` for a variety of common - Elasticsearch query patterns. + Skip to :ref:`this section ` for a variety of + common Elasticsearch query patterns. .. _elasticsearch-setup: @@ -136,7 +137,7 @@ In order to use the Elasticsearch backend, you must also install the pip install elasticsearch Using the Elasticsearch backend ------------------------- +------------------------------- By default, calling :meth:`compute_similarity() ` or @@ -144,7 +145,8 @@ By default, calling will use an sklearn backend. To use the Elasticsearch backend, simply set the optional `backend` parameter of -:meth:`compute_similarity() ` to `"elasticsearch"`: +:meth:`compute_similarity() ` to +`"elasticsearch"`: .. code:: python :linenos: @@ -153,8 +155,8 @@ To use the Elasticsearch backend, simply set the optional `backend` parameter of fob.compute_similarity(..., backend="elasticsearch", ...) -Alternatively, you can permanently configure FiftyOne to use the Elasticsearch backend -by setting the following environment variable: +Alternatively, you can permanently configure FiftyOne to use the Elasticsearch +backend by setting the following environment variable: .. code-block:: shell @@ -172,21 +174,21 @@ or by setting the `default_similarity_backend` parameter of your Authentication -------------- -If you are using a custom Elasticsearch server, you can provide your credentials in a -`variety of ways. `_ +If you are using a custom Elasticsearch server, you can provide your +credentials in a +`variety of ways `_. **Environment variables (recommended)** -The recommended way to configure your Elasticsearch credentials is to store them in -the environment variables shown below, which are automatically accessed by -FiftyOne whenever a connection to Elasticsearch is made. +The recommended way to configure your Elasticsearch credentials is to store +them in the environment variables shown below, which are automatically accessed +by FiftyOne whenever a connection to Elasticsearch is made. .. code-block:: shell - export FIFTYONE_BRAIN_SIMILARITY_ELASTICSEARCH_HOSTS=https://localhost:9200 + export FIFTYONE_BRAIN_SIMILARITY_ELASTICSEARCH_HOSTS=http://localhost:9200 export FIFTYONE_BRAIN_SIMILARITY_ELASTICSEARCH_USERNAME=XXXXXXXX export FIFTYONE_BRAIN_SIMILARITY_ELASTICSEARCH_PASSWORD=XXXXXXXX - export FIFTYONE_BRAIN_SIMILARITY_ELASTICSEARCH_SSL_ASSERT_FINGERPRINT=XXXXXXXX This is only one example of variables that can be used to authenticate an Elasticsearch client. Find more information @@ -202,10 +204,9 @@ located at `~/.fiftyone/brain_config.json`: { "similarity_backends": { "elasticsearch": { - "hosts": "https://localhost:9200", + "hosts": "http://localhost:9200", "username": "XXXXXXXX", - "password": "XXXXXXXX", - "ssl_assert_fingerprint": "XXXXXXXX", + "password": "XXXXXXXX" } } } @@ -227,10 +228,9 @@ that require connections to Elasticsearch: ... backend="elasticsearch", brain_key="elasticsearch_index", - hosts="https://localhost:9200", + hosts="http://localhost:9200", username="XXXXXXXX", password="XXXXXXXX", - ssl_assert_fingerprint="XXXXXXXX", ) Note that, when using this strategy, you must manually provide the credentials @@ -242,11 +242,9 @@ when loading an index later via elasticsearch_index = dataset.load_brain_results( "elasticsearch_index", - url="http://localhost:6333", - hosts="https://localhost:9200", + hosts="http://localhost:9200", username="XXXXXXXX", password="XXXXXXXX", - ssl_assert_fingerprint="XXXXXXXX", ) .. _elasticsearch-config-parameters: @@ -254,12 +252,11 @@ when loading an index later via Elasticsearch config parameters ------------------------------- -The Elasticsearch backend supports a variety of query parameters that can be used to -customize your similarity queries. These parameters broadly fall into four -categories: +The Elasticsearch backend supports a variety of query parameters that can be +used to customize your similarity queries. These parameters include: -- **index_name** (*None*): the name of the Elasticsearch vector search index to use - or create. If not specified, a new unique name is generated automatically +- **index_name** (*None*): the name of the Elasticsearch vector search index + to use or create. If not specified, a new unique name is generated automatically - **metric** (*"cosine"*): the distance/similarity metric to use when creating a new index. The supported values are ``("cosine", "dotproduct", "euclidean", "innerproduct")`` @@ -382,8 +379,8 @@ a FiftyOne dataset using the Elasticsearch backend. .. note:: - All of the examples below assume you have configured your Elasticsearch server - as described in :ref:`this section `. + All of the examples below assume you have configured your Elasticsearch + server as described in :ref:`this section `. .. _elasticsearch-new-similarity-index: @@ -588,7 +585,7 @@ to retrieve embeddings from a Elasticsearch index by ID: .. _elasticsearch-query: Querying a Elasticsearch index -------------------------------- +------------------------------ You can query a Elasticsearch index by appending a :meth:`sort_by_similarity() ` @@ -649,8 +646,8 @@ stage to any dataset or view. The query can be any of the following: Accessing the Elasticsearch client ---------------------------------- -You can use the `client` property of a Elasticsearch index to directly access the -underlying Elasticsearch client instance and use its methods as desired: +You can use the `client` property of a Elasticsearch index to directly access +the underlying Elasticsearch client instance and use its methods as desired: .. code:: python :linenos: @@ -676,12 +673,12 @@ underlying Elasticsearch client instance and use its methods as desired: Advanced usage -------------- -As :ref:`previously mentioned `, you can customize -your Elasticsearch indexes by providing optional parameters to +As :ref:`previously mentioned `, you can +customize your Elasticsearch indexes by providing optional parameters to :meth:`compute_similarity() `. -Here's an example of creating a similarity index backed by a customized Elasticsearch -index. Just for fun, we'll specify a custom index name, use dot +Here's an example of creating a similarity index backed by a customized +Elasticsearch index. Just for fun, we'll specify a custom index name, use dot product similarity, and populate the index for only a subset of our dataset: .. code:: python @@ -708,5 +705,3 @@ product similarity, and populate the index for only a subset of our dataset: view = dataset.take(10) embeddings, sample_ids, _ = elasticsearch_index.compute_embeddings(view) elasticsearch_index.add_to_index(embeddings, sample_ids) - - elasticsearch_client = elasticsearch_index.client diff --git a/docs/source/integrations/index.rst b/docs/source/integrations/index.rst index e5204c7d2b..8bdc1ece6b 100644 --- a/docs/source/integrations/index.rst +++ b/docs/source/integrations/index.rst @@ -111,6 +111,13 @@ on the cards below to see how! :image: ../_static/images/integrations/mongodb.jpg :tags: Brain,Embeddings,Vector-Search +.. customcarditem:: + :header: Elasticsearch + :description: Use our Elasticsearch integration to enable vector search and query your FiftyOne datasets at scale. + :link: elasticsearch.html + :image: ../_static/images/integrations/elasticsearch.jpg + :tags: Brain,Embeddings,Vector-Search + .. customcarditem:: :header: Milvus :description: Use our Milvus integration to index your FiftyOne datasets and perform embeddings queries at scale. @@ -125,13 +132,6 @@ on the cards below to see how! :image: ../_static/images/integrations/lancedb.png :tags: Brain,Embeddings,Vector-Search -.. customcarditem:: - :header: Elasticsearch - :description: Use our Elasticsearch integration to enable vector search and query your FiftyOne datasets at scale. - :link: elasticsearch.html - :image: ../_static/images/integrations/elasticsearch.jpg - :tags: Brain,Embeddings,Vector-Search - .. customcarditem:: :header: Hugging Face :description: Use our Hugging Face Transformers integration to run inference on your FiftyOne datasets with just a few lines of code. @@ -210,9 +210,9 @@ on the cards below to see how! Redis Pinecone MongoDB + Elasticsearch Milvus LanceDB - Elasticsearch Hugging Face Ultralytics Albumentations diff --git a/docs/source/user_guide/brain.rst b/docs/source/user_guide/brain.rst index 2342c1575b..0eacb93e1b 100644 --- a/docs/source/user_guide/brain.rst +++ b/docs/source/user_guide/brain.rst @@ -423,15 +423,14 @@ optional `backend` parameter to :meth:`compute_similarity() ` to switch to another supported backend: -- **sklearn** (*default*): a `scikit-learn `_ - backend +- **sklearn** (*default*): a `scikit-learn `_ backend - **qdrant**: a :ref:`Qdrant backend ` - **redis**: a :ref:`Redis backend ` - **pinecone**: a :ref:`Pinecone backend ` - **mongodb**: a :ref:`MongoDB backend ` +- **elasticsearch**: a :ref:`Elasticsearch backend ` - **milvus**: a :ref:`Milvus backend ` - **lancedb**: a :ref:`LanceDB backend ` -- **elasticsearch**: a :ref:`Elasticsearch backend ` .. code-block:: python :linenos: @@ -831,6 +830,7 @@ The relevant classes for the builtin similarity backends are: - **redis**: :class:`fiftyone.brain.internal.core.redis.RedisSimilarityConfig` - **pinecone**: :class:`fiftyone.brain.internal.core.pinecone.PineconeSimilarityConfig` - **mongodb**: :class:`fiftyone.brain.internal.core.mongodb.MongoDBSimilarityConfig` +- **elasticsearch**: a :ref:`fiftyone.brain.internal.core.elasticsearch.ElasticsearchSimilarityConfig` - **milvus**: :class:`fiftyone.brain.internal.core.milvus.MilvusSimilarityConfig` - **lancedb**: :class:`fiftyone.brain.internal.core.lancedb.LanceDBSimilarityConfig` @@ -1904,6 +1904,9 @@ and the CLI: "mongodb": { "config_cls": "fiftyone.brain.internal.core.mongodb.MongoDBSimilarityConfig" }, + "elasticsearch": { + "config_cls": "fiftyone.brain.internal.core.elasticsearch.ElasticsearchSimilarityConfig" + }, "lancedb": { "config_cls": "fiftyone.brain.internal.core.lancedb.LanceDBSimilarityConfig" } @@ -1955,6 +1958,9 @@ and the CLI: "mongodb": { "config_cls": "fiftyone.brain.internal.core.mongodb.MongoDBSimilarityConfig" }, + "elasticsearch": { + "config_cls": "fiftyone.brain.internal.core.elasticsearch.ElasticsearchSimilarityConfig" + }, "lancedb": { "config_cls": "fiftyone.brain.internal.core.lancedb.LanceDBSimilarityConfig" }