From 2f88e6ac60c7b4e024b456f785c05f1fc401ec01 Mon Sep 17 00:00:00 2001 From: user <571680391@qq.com> Date: Mon, 17 Jul 2023 22:40:42 +0800 Subject: [PATCH] first commit --- .gitignore | 15 + LICENSE | 21 + assets/main.png | Bin 0 -> 186007 bytes data_utils/deepspeech_features/README.md | 20 + .../deepspeech_features.py | 275 ++ .../deepspeech_features/deepspeech_store.py | 172 ++ .../extract_ds_features.py | 132 + data_utils/deepspeech_features/extract_wav.py | 87 + data_utils/deepspeech_features/fea_win.py | 11 + data_utils/face_parsing/logger.py | 23 + data_utils/face_parsing/model.py | 285 +++ data_utils/face_parsing/resnet.py | 109 + data_utils/face_parsing/test.py | 98 + data_utils/face_tracking/__init__.py | 0 data_utils/face_tracking/convert_BFM.py | 39 + data_utils/face_tracking/data_loader.py | 16 + data_utils/face_tracking/face_tracker.py | 390 +++ data_utils/face_tracking/facemodel.py | 153 ++ data_utils/face_tracking/geo_transform.py | 69 + data_utils/face_tracking/render_3dmm.py | 202 ++ data_utils/face_tracking/render_land.py | 192 ++ data_utils/face_tracking/util.py | 109 + data_utils/process.py | 444 ++++ encoding.py | 38 + freqencoder/__init__.py | 1 + freqencoder/backend.py | 41 + freqencoder/freq.py | 77 + freqencoder/setup.py | 51 + freqencoder/src/bindings.cpp | 8 + freqencoder/src/freqencoder.cu | 129 + freqencoder/src/freqencoder.h | 10 + gridencoder/__init__.py | 1 + gridencoder/backend.py | 40 + gridencoder/grid.py | 155 ++ gridencoder/setup.py | 50 + gridencoder/src/bindings.cpp | 8 + gridencoder/src/gridencoder.cu | 479 ++++ gridencoder/src/gridencoder.h | 15 + main.py | 260 ++ nerf_triplane/asr.py | 419 +++ nerf_triplane/gui.py | 565 +++++ nerf_triplane/network.py | 352 +++ nerf_triplane/provider.py | 764 ++++++ nerf_triplane/renderer.py | 700 +++++ nerf_triplane/utils.py | 1514 +++++++++++ raymarching/__init__.py | 1 + raymarching/backend.py | 40 + raymarching/raymarching.py | 671 +++++ raymarching/setup.py | 63 + raymarching/src/bindings.cpp | 39 + raymarching/src/raymarching.cu | 2258 +++++++++++++++++ raymarching/src/raymarching.h | 38 + readme.md | 140 + requirements.txt | 26 + scripts/train_obama.sh | 5 + shencoder/__init__.py | 1 + shencoder/backend.py | 40 + shencoder/setup.py | 50 + shencoder/sphere_harmonics.py | 87 + shencoder/src/bindings.cpp | 8 + shencoder/src/shencoder.cu | 439 ++++ shencoder/src/shencoder.h | 10 + 62 files changed, 12455 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 assets/main.png create mode 100644 data_utils/deepspeech_features/README.md create mode 100644 data_utils/deepspeech_features/deepspeech_features.py create mode 100644 data_utils/deepspeech_features/deepspeech_store.py create mode 100644 data_utils/deepspeech_features/extract_ds_features.py create mode 100644 data_utils/deepspeech_features/extract_wav.py create mode 100644 data_utils/deepspeech_features/fea_win.py create mode 100644 data_utils/face_parsing/logger.py create mode 100644 data_utils/face_parsing/model.py create mode 100644 data_utils/face_parsing/resnet.py create mode 100644 data_utils/face_parsing/test.py create mode 100644 data_utils/face_tracking/__init__.py create mode 100644 data_utils/face_tracking/convert_BFM.py create mode 100644 data_utils/face_tracking/data_loader.py create mode 100644 data_utils/face_tracking/face_tracker.py create mode 100644 data_utils/face_tracking/facemodel.py create mode 100644 data_utils/face_tracking/geo_transform.py create mode 100644 data_utils/face_tracking/render_3dmm.py create mode 100644 data_utils/face_tracking/render_land.py create mode 100644 data_utils/face_tracking/util.py create mode 100644 data_utils/process.py create mode 100644 encoding.py create mode 100644 freqencoder/__init__.py create mode 100644 freqencoder/backend.py create mode 100644 freqencoder/freq.py create mode 100644 freqencoder/setup.py create mode 100644 freqencoder/src/bindings.cpp create mode 100644 freqencoder/src/freqencoder.cu create mode 100644 freqencoder/src/freqencoder.h create mode 100644 gridencoder/__init__.py create mode 100644 gridencoder/backend.py create mode 100644 gridencoder/grid.py create mode 100644 gridencoder/setup.py create mode 100644 gridencoder/src/bindings.cpp create mode 100644 gridencoder/src/gridencoder.cu create mode 100644 gridencoder/src/gridencoder.h create mode 100644 main.py create mode 100644 nerf_triplane/asr.py create mode 100644 nerf_triplane/gui.py create mode 100644 nerf_triplane/network.py create mode 100644 nerf_triplane/provider.py create mode 100644 nerf_triplane/renderer.py create mode 100644 nerf_triplane/utils.py create mode 100644 raymarching/__init__.py create mode 100644 raymarching/backend.py create mode 100644 raymarching/raymarching.py create mode 100644 raymarching/setup.py create mode 100644 raymarching/src/bindings.cpp create mode 100644 raymarching/src/raymarching.cu create mode 100644 raymarching/src/raymarching.h create mode 100644 readme.md create mode 100644 requirements.txt create mode 100644 scripts/train_obama.sh create mode 100644 shencoder/__init__.py create mode 100644 shencoder/backend.py create mode 100644 shencoder/setup.py create mode 100644 shencoder/sphere_harmonics.py create mode 100644 shencoder/src/bindings.cpp create mode 100644 shencoder/src/shencoder.cu create mode 100644 shencoder/src/shencoder.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7419e4b --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +__pycache__/ +build/ +*.egg-info/ +*.so +*.mp4 + +tmp* +trial*/ + +data +data_utils/face_tracking/3DMM/* +data_utils/face_parsing/79999_iter.pth + +pretrained +*.mp4 \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f565fb7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 hawkey + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/assets/main.png b/assets/main.png new file mode 100644 index 0000000000000000000000000000000000000000..00a637547ed9d0728ae2a0fcc938a6dd544178a5 GIT binary patch literal 186007 zcma&Oc{r5s_Xj?xEWJfZMYfWCD?-*0A!IMIS9W4#XN;k2ZDc3=o@`mi*rq75PWELi zDU2}~Sq8KDJ?i_puHW_h>*tTT=2_-`&U5bjobx)bb8at9jC5I9xLH6T5UbwZJ7yrz zsR$5=-iYZGa3-Dj%^3s|0O{SieLpyReU3Sd!S zAJW-LPco=pV7L6jdDir^qtbn+XD0(pA8MOWWF|MSO7tzV*qdm(Thg8V#_~Yb!u_<_ z5RD97i*mm^QkvBvsSwQ9f5o<(6RsT zKqLDKi2i>EadIa#{`Vd9EC$5%zXO4x`#T+rKWQ7@4GjOzF;%R|8%a!tGNPt=R?YUMxW0Rwidh8CW8p`QadYy67`0OV< z{YQQznp{_3U!s>RY*u5hmbYJH*J@E$SI6Vt^X`h)H8HVXTNz2o6svldRB|m2hf7LK zG_4Nk4}E))UdYXwD{5v56o2v`+Gl+Bd7LtP;(z9TeUv`I;+EE&XB{E_e zG#O5*ji&njBe4yrZN1A|q_-OC5&0zD9sBXvSJRMNzQIA|oa(cmBon!*>!A z0|qJ;AK{g7wLge!RFbf5m9<_|8A9seCyfsBrb^GupJ8485PM_T#x8byB+%{h-rlI+ z9h{6L>2U!97MO^C&6i?!>&OG<(V^7j14)kQoVfpM~5_mRDJVP-TvIkQC;i_HJurV z(;iL?MuSuF4hDPC?hYe*A~{a+saaur@M(;vl*Q3|`vipZ|_(40LbtD5>4@J!9e z9$N+@K3jN$=Sj~cTi213p#`X2jb9o63aTW+Z_TNod zE_^NhajWU4q?+qGsx3R$CFOOz?^VPSdSZw(r9{US zeDE~Zbp*EhL6$H%LNiE)h37mP;w32H1MJql_Bc1E95@Kb;!14AMiaj!!Md}9#yW+t z8HWFJLiaC_5cRZnucKDmqjAx;F;_IyGOGI<@wJ>xB^W{9P zd0=5UP1bKCukQv=ldacl%D#~s#VhiR53|619l72;mI0Rt;X)NPgU}%#UZL}=!Kr&E zT}7~RjNSb=V}YcCCnv$A(hvNXjD&uXO1K@NJe<#VM3T%VG(fmyx;-}VP(_tqPPr+Jp1Eg_| zRg?F?*Eec1tD(!OmZA0zu=$CdVJw7~4K_qvM2Cj2db;NDx3EH)N_N90dT{tkJdRj- zW#LP<3Td_yTUZIR=BLeAsD+ajD-bq|rS0t`x~E{Bi5Al1s!7-%_00)<6}sQUMw`a^ z*K$oV-O8cV*4g=<@nSs=6y7E>es3Rz3fLofRmcp^vlYTlOf3AGrwUjcy&J#yh*0rE z)uN@@Y@=7<@UjOhk|{Z$4TRsMdVuL-2m{6Li^krYU@nR^Wkbc6?6dEZ+AXH zU1X#F&wM&j$_g(^keN`Pw%^d*-c+m$=C|=mZ@PvJS$iF&~RY85x6KZ5}w-mBY>r{lb?JJ(5z~T-aVXLc!5L$~N%q|SZ@r9&; znAxH3FJu|Omj~uy`*wcrHHLV|kcd}o_(Q9NVJ}RVW<^JNI6Lmf1e=cYN$rV{Ld>TG z@p|>2l`S6tX_1H14j(S8&Who?&>j_gJXL{i(2d8=pDKTQA4MSS8OSnLfzH&+f9EJ) zwwC0)3vsR>+{Srzke;vmowR~6(^f{BNR^cxr+B|S`LmB`=A2r+s9A;Duj;WZCTNiV zRBlX>5xZt&Vx_4PslG9<21T2(wN9u9W9!@;VuKtqs0MXJ>ObA;)+>WMlx_cq`6jG>B^yK z;OPec()^xN)lc0QSE=8+S?YGXrupoxymt1i@r>`27uu%s6v zo2370&|8Jjwxi0sYq9!dk82e+54XhhN2Ts5BE}U>hdU(5zcmjpJ20FhV2d5h8X4`0 zGUm{PYqenPcEfb*SY{3k+(G(M8R&s}v*-Mb`lK$18Cr(9>}J36Xo~D9Iq$cVvVSRM ze4jxeP}>=F{y&EQ!tS6}A~vt$us-IQOBG<-CIl<)lW(6!z72wSfo!my(*yy|*lC-lQsK zeELaGHHR+G7rauyDsMx?gsXgMjju)p{2%<3gJ<0zx_9ZMIN8#E z8CN4lxFL#e$}WaQOgwAoV1iI>wsN7&8g5V2LB76GCIS<&WgWMtRmeH)<=Fi7f~Vc`Rh2#zs7{$p)lhPw*2eGv-3EO?2NDy)^?i`~6=i@X#-;%O6kCW+4y zGx~(M4G#hEh2Qrp$@|~SURD1MBO1;n)EgZgC!W^o4`Rq?pO@@(xba6=_zn;Aaq>E! zTU$7oaV?@=C1p3%y$Z6DRoQSoy{rU8T+-XReKxD{N0p8rc3bu@4&->$?E7B~&3;~G z?F7%EoHYHy^WttDiMJ{#cmioXyk-cZyzWzqD16(oh`(IkA-0y)2D^V(dph>b4BmPz zz|)(n%>CA*Xs`vVu>(IlQNiue#Q_rt$;!zYb+&{NN87~rrIG4wRa?3aa$X6jNy!Yh;0phI za43Hf{O^k0GFEBcdtEXEdE~G@DiEB&QoGNaD(}!hG<^sYnu+9l8o&iTl)*zNuTI@2 z`a+ahzLWbKR07wxY!Q8Uo4&OL<`{Xz8}P)RaYs)>y8Ot5pggNvGZi(70ojd^4_9Gt zi8OBb&fV1gOH&R-cXyA()B?FbOdqMS(?COlAaO>*r!OxRHOa9B@{hAdMjkJcR$jDk zmW_ZqTLa6r;WuhKnLX7rA3aYtadL49t;W|P&|Qv4>`ee5oq55nQYBZhj;0nRsEY^L z>^6Q8%~D+iB81e)J7To8^K{rzYT0=&FGhKS`6_lg})aEkVvxGU*Qm>hO|4C9(1{i;&q zVYY+@TqZehyc$}5%5UdP@T=2#f70*2%Dwf+NmQQNea;yDghoDtxzeU_sF=jo36pSU z?QiRfJhQ0wF&4SJ(^4U<_k->m+{ax`Y?sXZ2Ib?2Y3)k4Uife=y=G5}7ULPp*3~%k zvt5cX;@+ZWH#zCIMFTalIX9%^>mK%ey+}1d1bv2rMPP#amYe;`7Q{e2;@<9y&UtT` zHz{;Aa2->hZo3n1SoQ7;S;1j>D6DtW>-)Y|=}!4$X7y$VKdhQF4wVhj_`zN053d60cxUV{iA-j)*0|&Uiwp)XOH$ zIW?Rid|xgpG^{(w{!M+DibLOSr|yCe23O3$XxldNRoEd(Wrn8sEWbmd0{t0!8JK_| zhqxgA*dVK5PZV*ljeIYR+V1VJu!a{i=Fiz{))?iRB44L$Ed0VhwC;OEbWhgqjU%3b=OanklFqygulH$HeqZO&%N1}lG(w`L zt!|bY-9cJviP%B4V*tB>mS07xRhjae;~Z;3l%fx0ochw zY7Cg0GLVD#?uUu&l(Bny78r(|{BiPDl`Wo^;^LQKLl27B`It9TH->%#5DNl*8Hr#m z-K?!L`}P)Z#s8D#MjSvbM2J3#1{bC^`YT*{H{KUF<`sa)F^O{ta8SdE|&K1B1rQ#Kr}=c_*e6@!u>aaj|E2XfCw{L$a@V%Hpg1}6U-l|A+n-3gFJ6mW4Z_oXm}3%qHRNjkWN$ofZlMBMsz z%!FKh)cI9pu&`wQ<^k8uPkBhST7%Q+HplD|2ow=~2M}B163&gEcTX;8zRz?zWA_`#}RTO>)$AmXMq`HrzdRbrYcfQy#W@1 zTg|4m1rDl(j0no;_`@9ut;4CX)cEIr!sIFtUYd92!#RaUJ=|EB#m1G#rLZ*b=Q6< z@7BOb|X_H@g*U zTBr6ou3$U(XD#aPlP(JvuJDCC&6x`z>IyQbG3uV#T3;XM4otnepK~&mh2P)Kz~duf+kU3Bq^4T5(P-2`hN^Z- zsB%m9Eq_p%)nB3FPL;pCOlmh~C}7l?yJE@8vx4**#}NA8jdgR*FFc1!?c1V?yH0Y> zpNZHK0MSPXxcL;`ochC=;wCWK1T#j5&hOsHgEviJVaj=Lz>%6)ZaC{3ahr!XAOA{rF;!mE)jj(ATy}}IiE{HbY#Ze{3j{wmkf1-PG}bhz#5sTA z?}I@6H&-9F2Ie?9L!>)wAv#M_y1T1!S%e@j)s<&rVgYJEB3T5+m+XsPhJ>PEQ3E#T~jsX|(g*J?N4a%&q zAZZ&Sgv_(LpjmCuiTsg~A9onu?(SNs69|frGSHZf$oLv^~d24Z8aJkOh_}gcDUq|LlO&MhDV43UN_(DQzxHPeOgYt zc3qtT{ppg(w?Pj9y)v;$eERY3n|h(hyGJkb9v1dI9)6x#;$0tKB`p@7+sYTjY7*vU z%+UPCU`E?nNgn3?A~rs)#K7{a^Ix~Uzq~mkrAYa+{HtFT?d5mDxsUW#NtW;u&m0N5 zrPGzg$Oj$S*;%R$>UY0AbX4?ZD00@YjxyL-vT`nzR2%E`ot1&dQv&1)xc)e(a*}R8 zG6dc4_;zWy&bvc1>(!fgW~--AFtS^f2nlg{;&9RW)aShCu-GNc(MB=^J|nVl-r>S|z7(}@2u_Mw z+qy(Yb^131E;McFy^!6fUX%s$gE`(Uef@=E_HX2ZJNJQGxxyYTmcReE@nBNk>m$C& zv&tLk6wAxv`O@Rv6V!y293<1e;G9XC9OcvypP zGHqL$5gZIGGqyeTyDsK9ib_>Q?`{TYKXg!m9yq=Y^%aZ)=2#=_^3EzsZP9p6S#Nu_ z-SRyiehD>YTOEp(<2Wb(R6|VYGb{J6r*oMOZRfuuz=UMhyd$SherU5f-@>V88QaB# znRk0HZHbu6`g4Jy9*$PDzV&=vTCfdrMGYOmngTdUHg_nf^96`WV_?K6RBE|YP(Q7s z#VlswSu*X>ZM>@)W5+k8(oDeTL;$_w1-}1d&n8D`~9lNT3{28gPu_iUt!>g*o6dWHi zXxHnCR~HM+9u|qa5_hQll|Gm_O1ycU8M>wJ3u)Lnq2yuOn z=N`LdsC5iVFxZ=J*Whp$@kKUv;g@=4u>oX87Sw-uA{~fH8!*Cqp`oc*toq!j>S6+S z`0sDIAh&T98^L*X;APLML#X>IR>{B-zGyIK@z^sMg7 z?U7-1v{8=0MSp75p;cE_Z0Wspt^`$+f(_^JY`ZnAfs6;g?`4S>$kVhdIm%7RW8JKC z-Ynv0=>;xv({}lv@di_Wn+r;Yh%!Y7& z5*a#iQx_3rNV=D2RpRYCas%(*-i#cuN^!4t z^_~TM=B+yhwN%KPXQuZ2v!@_s@ZeCM0Gc$}<{#_G+35br|;B&~LTX)#^>#$rO6-{tAD zXbOJOX!|E?mElW7Fe^Oelsv#S)cvXoS^`4y^Z9u7AplSn5gt!xsVC=7vawbrK3gXb z;`{BH0W&iZQUFj+PZW6O#@d_`%l)YR!v`0rU1pQu3hfvrM(Q9zbM?jc50Gvb4{M}9 zA3F?=-}OW`&xPnR=&@dIqzGNP1wbW;GO4FSmkX(<%LG)W)VS0fHg&_?6!_n+#X%b2mJAeudqTDMDOqBRlVwdkb!NEb;sduDn5R zY+?0uej6pnVIDv%;WJGH_m!u^Jz^Ulu$X9{q!<-t-+Q{JsE3m~!z#r*!CL(3HULIu zx2qfmU(lh8_TIrR^pf5R<&+2&!H1Ze!_!}*w{0B`6^bGv9^pRl=lph^+GQJv%d6)W z>K{1k<-C&P^;`O&HY{WBC6XuuMrc{k7ha8?f_nkAH zkGJ5!`43V4X3IK4@q0u?xS$+gh(O9pPt}|=kGX&?@IqIhWo3awc=}relVIhowd8KPb`~+PILk_cN;*JNLEvn3GHU^;l;zFx0miK#uX@-g*&S z@(n%9;K!hZm|Gh&#rj>R_UNk@KHYwW4Rx7_2YJ$OH9dt@+wT434p{~(39k;H1}jf=PxLGi2zKF7DU!>|KVodt}E^!i*9*-^0l#HBCQ=7 z9Ig&-Z*1EgxZjxvnz0{_WoN36LuFj1v&zZsQwN{=8TDvL28cvo65@%t@ru`0=_59A zQOq29H(!tZ>=8Gey4mQI(URNKva5=yx|b!d_r6Jew6O^YKh59X_n4zHoZr&dbrhHE z|7nGOlMJD{t-Pu|0D=DX0w|TL&snDJJ zsDzfOBE1`Em`CX`3Q`uZyIX8xE@&t%9)uqRh@h>X+? zyd?R6!7A_zXK9h+0k2}n3i~TXMhmaR!cMKEsS@P1YY&22l>pK8R|y8MFQg2dP6X0A zWJ9>lH@8E?tX;@TOB#IitcEiXns-3U7tfP&j<@3r#ZNaAf+b+P#CHO~J1-cXaKbRv-VvJuBs+i`pI@24hh3?w$NnLX56&`9oSUh8jZMO!>E5V7~!n zEdIn(`o6C|3irlzlySKi;8#WYrSeWT7ZffgA^1f77Ib!oA z23+rI_)-JUjt|Rm5c`&H*J>wXWtBCcJo8xHdzMLgXpx(6Os?$k^7a;CCgBX+>_7|v zMFKDEe!wxHs?K_uN9tP&@1;oCj$Z8b4`f?fFgXpwWdQ8?=8icqpX|#Mn7*cyHnhQ! zj)P|g_GseM1Q3(a$Qh5E-u{g2tUmLF3b)nKS`IGge>T`hsu zMtiVs#aC&ewqV~7%>6FU$e0;WfkgT6k!ILfXV@aiHk^0x(v>ZZVx1v zJbn?%ITH_?&%bA*r<7mV#t;{cM!!q8Zc^NLKP+;4!UpGSD+rj41`{3G1h)BdcjJmn zPbq>OB*l!QFW(W{@b{Ocm?{?_k00+fgx_R_TQ@y*8M?vB7;r`)zpf(P3^5Q=Lm#aL zJU0%Wc^O5tg)Ef*$}v!en9L~)`7tgN#8N2FJKteq`V4fl9@2fyr$fop-MJZ0GW~Ob z_I0FIcXdVjAQo_IN~$-OP3yl((C1$&$$4d0eT$p6%5y#i_QHIZq7O!7h3y#Xl|Nsg zRwF78(VDn9G7IqXci3QragAMIwNg&qb~q;WK=(a}pbvJTzuz@E=P|nNTwI85kh*Wr zt+9)8Ze|wH`H$K%S&0O(1$^Khso)3_ABIl&6*PjbwQ$gROz*^93 zTwt>1IfEX)p@(GqQefRD9p)6iI|YWFvXx+a({4OdT!Ng-{v-%`bt4MQBtqE1A5NPu zXZu+Lvfh8vs2dpQjEW6fHZYx6Po&1)>0^qJB%bwEzG{BNySy>PX34?p_h|frn7Hb- zmv-;dxdH*j=2O=F_M&8u5`>UOtmwVx?PRT)3*vdW`jx(~?#BGQu?UM2uTMn(9J+csul@dgO+uwb*m!UGQK0u9&2nX6!If2o z?)#wwM09|N{`(qwZxGYL76SdC`a~?^#}DjcI!$7M2Fc4F7qyB3aD+|?!sAMNi`{0R z+)w904B`IAE!`XRqB;+Ac7Sq6*3e<}X_s@f&u4*ph>W)O)ns8~EQOw@xMV@}=$Nw) zneInLD<36H$4K3&OOU3g9|k=c(-PWw_Xb4{#iLSjIgSjCD_rgdZUIQ zr$q`q=+4YSeW_B5+RAz0J3~&9NRVYN(jH-CR=!nQxF#%;=>=Gqq!kOxpjQN#2-4Ke zU1dtqFuSby=1x5&6oA7n!#}WJ@HScOq82Bi>vU1*><}5ftlb9v8w&|CCp1il_-Lz& zy*+Pv&rS>-PS=-4d12!ClnbA3{TN=6y>jLDt5>hC`i36|at-HChpz!|$4$G%Id=2u zyQ_y77WiCGP<%?7R@nRN8}6A~*s>D)Ij2n0)+Mf+KM~IAD?0%afku zR)%bx`j|Zq%o{x({H4wWI-4Lz)-ef}WTdR^!~xnPtK_$~WiJDpQR)$Ow;nV2g2AG_ ze$cOdIaB21L%CbT&$_x?bRA!O*=YV_5Kc(qQ)%}#jVCM67! zWr~|q_0M32&xie%cYkjkg1UYibiSk^d7h7El6KlC))nrJ8db75$4{u}UcR zaiWop*M;Mih$D0tZDwBjLzRBG_3}hLnfJbn(tILMZ)7Cit7X)rRvw#cuhLSLSoHO3nxvMGfUN7Q` z+K_*%66b~2J$#Fad$FfU4Al=v1i^A*Tt_5 zI`6#8_6|VK9JJ5Rlsn6?sW~P3sp54Ihb!2N>>8=bc>Tt)-^ZR5$?x}*ie9ExcqiMz zboA$^qF1nPmIr9INaGfkR z0=K`!DnA(o_}*On22XBLPe=P8MwBl0+{~it+813%>4+amkkJc}KPln(loea#*Sjyr z9TT=jc0h>;6&YiatzXsCfgu2pnnr^}Urp)K-tY&BmPY;x&g#_G>)rgj$NyI9^WyFFOoYK1Ts1L&~qps0`#1THHZudLD z4LT^>ZtjSd0DN#YUNqmiW+;&%+MzGa5Ta*oZP2ry@Vz$O1@Bv1zg&52^QPM3aN~ld z&6$l=;#sJ^PTnlxE-0~Ha8fdHyU)hqn?FubBth+0Z0_l7#zfc7Ji1AJ508%xdIo;2 zN~S`@?g!ms5){*!8#P68v-vw+9{{s&u7gMEHe@^aOy>61klSm9sBbVg_K6Ad z1`zQtlSK*BrLgpgss~0NWqKJ}4EEB>GV?s`CvFqln(v>1Z@*Zej`QKj=~Hzp?yq)# z18dP(>%Gs&LSDc#-!TfA0AKwc$WC3~Il5&9z)b+ls1>~l-0Vw^VO>+|VEcMXBH|&5 z|Asb%RGA`p~pZn%p-rqPec#=RkwpLa6#;a~?vyGBqeH7hN&2 z`k~ANV^0CGPLEMRcQI5@EGE) zu8oxwLK6vUof7y_sc-!*9RFRVG_2dSr=0i8se{nYf{=H0oqs5?iAy1;Zfn=m3gph$hDbwNMn=@nCknUhno!-8aruTNpl^p)asyv=Y zim>uEv}58!=nV6A|5||A-q$1uW`m3l)msk@{EWp1kvW6kTuxvB3B2=R1dG{Y!aN6+ zxnvKNd-fTEQr{n83tb*h>-qv%jXF&)a{tW#A%8a`aBH+A3G*i7w17)HF&askb1rwG zmG*&OelJ2OR~M@qA^lQXvAtQ4Heo~a92o?+F;j$X;WDye>^<%A4Kh+} zq;;RuTao$3y?!&d1IHY5SA?>GMRjP_&gmO9g@Si918!l*vc{5}&&t=KD>PxyFV0m}HG)xw+y$ld4hrYkBbR%rx>Y47ucJlHM*9HAO1;Zt* zUj3k#JhU;fSYqfI64;`0$G%vHVmnj*LejQ*V5iU19w9oxO|Mte>jf=ac2T{y-5#J?00B*Z+7OW=VP_1oTIRBSU&jL8w10j#)5+h- zAJROSxt~OF9}zr;fK2edNTrCY_f*!H;XkiI4~pdbfZ|+J8+kr9+ad0>@`xa}E|elm z;N_rQ8>_z37#omQ%;!}+VaPS&-D^JbsQ0Ehu}VkBkK^E40_E+sY!lsW5!&bp8Aino z|JK90qdiHzwx9mxu$>aKhMFIedX_r|;>VTrg`A#*R`VurD^KYzF4_lmSm<3UNvy>C z^8V*B#|iR+F}R4+iUB|*Vu zxQ75E8kUD@){U(udh=a&pNZk-lDgwD9(T||2Z|FGj9EB0OyxZe-mYWUk??iLTukNJ z+Y@&Ms8=UHK!v}L_4MdAc=>d%$aL%j$(nH6!d>sN{M@C@KDo(&`2wG}3Yz{uK!pGS zl+GgX;F$@6+8$i*nu~SSYzm}&2Wp62V0%>O$a@ib_b=I6I;_BBo6fR}!G-DOXNRYtfRB!aRR@}FE(Fu3fMX&P^U8ot5{lEPjStIPWxrWXjrfp3r4x99*Q7W zHFZ|H3m*`k?%6`p=WkV#N6zZt;Zlm0CKRanuk=&wUNx&gH6O@z+3@Ndmz!An18oBN zWnMPMCl7F<`~TF(L_6~vRb-}IBeR??G4Bbi10sq47K8M#5?p$)`71DQ*YrPYYe+^P zb6+=3KZ(tC82@OQKES4+Lp?nC^`pijx}YZfdis|ELQz7M{MHk_4-d4?gf-g*E+|db z=lCdZYjvzUw3gUD<^+M561YZ1zN2V|ZMy_`!_!_|nPQ?%ti23=o}B*Qr62v@CJGAu zX?cSfef`?;s^n`MovQTT`L27-6JZhZXYuQc#*zj~dpsPp_5S|UJYQKLZ$h4><+lmn zgIfPhpa);JnOX;oD_j#cZF1D5%s zWycMNkakh16Ma(i}K#jDY3ws3ixMVLAo1?C3OjOvR{(pr=GoPta=Z_C^a2#8cKP%-7NwDz?qxOWZKO~9#aC}Y|#tmqCi0> z7MAB@Dapw0S%*?wH1-|zxFNTnU?<5By%WzIF(eG~9od`{%JVX~&~|mt%*@u?a2+6m zPw@V2K@x~@cJ`7j>Uj@zr2$MM)Z6%;xY%-vb(jh}$R&o?fH z=H|=MSeKT$HyJ&x((DX3DY%&pWphmv27&gmBnT*bzn`-2bcD%!xegZ9j%L*F%&WD` zI;%Y2;GZ?bd_F$Oqh;{-%EA?L8YG3vA1*d-W_%rC3E>Qzs@PC%R)7Fi1p}*29 z3@bj~oF#A#uefq|vs%^9t{)};)I{bgxlspHF6D5YjlFObU@=@y05rv<{md8u*&i2t zK?5{9AY604^TNsiRF4+ivjHq#Zxi}6alW1t)jLt(7pI8He{*xtjTaqH|gIe?>jQbzpI-!)VEUTr3$WHp_kZhu;FQ1v$V>WQv~or!*``f)qp zecP4p8nsCbW@UvLk>;AcgzQ+$sXB|1`IR333bjmdVVpdCQj*fVQf4F2Yv)(iaP(Hv`f|1GwWFA0fv z!H9JGv#&(V&;b2T=(8BbU*`2^>;`|O0R16OhQy$i(3-%MZu*}5p@1XYze;v^)W$!qC)g8Y^ zQ;s#>J8hKwg5Nv6bF$v>Wy_G|{QCZlnmL0{fS%aTWPy*26d)3~pVc-Z<@ZH7yaSSx z?{VF#pv-;@0w)7a1%gfRe98Wa<@f9#%xCm``~Jw!d7ojw(7JPasgh{71QcZFy!G!@ zq$gb!^hOzaa|ge`7`{|Fp`nptOuJS7ifzQOP665lNb6^Sigj2M(ejjA!$*+ zjsn4?=g7fypP?gjKJF7ZUnfGiQntC-Iq$Fa4DnVFQq^&s=)bGTCWujk<-zyx5A*0d*suDUf>1Gkt=rX zU72S1meVkGBkZ_dM}_X!8C*iV#z!-q5XF__?$JL_sogwc2aOb~Vy#PL3K-VX7(W#* zW$in}5|P2kH0%V#(!cnKfDRdq2eLAaDJQR)tGFtgX23>oy3QNat!xA>{k7SYW*yLp zJGDTcCcsulQ~>2Vo{0_q?-czr9XrwIIr&o7+*t=pPN_%ttIO_BHZMtvWo)tO@f~3t z`6_nSt_2wTd7XF%c7UZxICvfENVk8i!WaS~GF{FgxB$#(?^2mXi2f(58H8TyLLh{f zIDFljX4oadiQ9%;AcREt25qDP`IdGYZv+BCv)LxpxyHr_pP>Ksz&;$PiTxxH9#NOe z%bZE(=jRQ(r3{(<74>2@g92nsMDl*T15fRri%pgddRf=3%1i!geK3w0(96>2h9M&Y(%$cnzutv>;5B$d7(1t@I#2&}E)Kir?`%UAp^$T=1%!70^*w z8RpngT!!#%)zg>-dTzK$_YKtS&tc?K#oyu$8dpRZ!|n}sY%KjO%@KC0T^T_h8N(AB z>WyFDNj9*ek!-dP8ZIu68iVD*Wo37T(?&Yh zFr7w)#(0qdH+>5TAxXW)RSG3TDW|~49eO~go`uKmV1{mbDvPTyxLseiKJdF72U&R{ z?EwTXjBMRj-S8=*WGw?-D9+v42!ok#sv1nUJ$}c;KgEvkAy?|mxdGR?;2XH4udi=c zZmBrEQF_98+cBZ{y*xyZSe7+_c6UW28KnoNk#!STFzP|rWxeB^&Q7KZ&jh9yNRfq3 zoJVAtl>cjiuJgySlv~Ou>zo7qS^FMzSO*#IF>AbSr?&>J7CW@CXPgKZeqaVqSzqef zCe@A0esbpMt+|jp;!m4}p`AkhEL)b)xN?@)7tSamywKKH72i;j?@P_dlqq;z8->{Y z+^HpQ0sHg?F5tpc<`>aL`}SgX((d5{EeY{Bcg@5b2~s;LaihZ;maH_JKaQdL{VtIx z(0x9G`Gc>1b1D(9Zq5aDoe~p%H}bpawpBA0tQkfMo9larF#SY$7q)w7eC(=+PUvei z00b#l?`m-5f80LnZC{30=k_Oi<61XU7l7Cs- z*8Z|@IQgV3Rc3U#FrJrZt|kT?H0M{t^=IejRNwx$2Z*zvf)t<&895iFuicf0+_{js z6*~;pIB@GlP+u#)KdYfDe!a|O5SXrEfPoAe<(A!UlXFJPMAwyBFVS=I9?C&zvU+$$ z^Q5)xRb>0L5d2*?mYDwbw0CMx>6=8pIT^K;TBvJv+KbB_DL|GXI5B@$+2o+a5mTob z0&~~|YA`xL?<~8}_Gy-7w%->+fQqt;J*j{1TlyYFT}LYyK1gf77w(ZdV0GV&-l!`- zGN@hB*m&K>V?O&4u1zzu>6|}mBhV-sv7AzUQE^|@Sc(C#R)pI@(Uo^;a20?@2Q~>X zRmc-u`wkM}KCN8Kb?~Eb+k9*Hjp zdYS@_jXNePn0pHBjn3&;ft$}r?T!>zO!g_c{j zyg55%-KkZBJQ|$OG)eAPnS~}fK57$x%Sn(q+_N{q?|zWgNe3Kx`wzfJ{(5Z!+dG`! z&Us2JKt<7HSrBM=@!L24?<5CZ%tPme=d8B=HwREI)cH3&b*pU}%9bXdfDP|zkC3$s zCvQUpc`9Xn`otL+n;&;IC2d2nc_G=yzS*EF@Al+Z9h_n+k_N*q?99qejt0oeO{z34 zh?J^&TjF1IkGFsL z_uaAshLP}iiirR~0i>)|aVz=$_vcU zWau8*dF&dCmMmqL?(n&PDxv)$iKIkoef65N5;(3ci5&%o%h3ybRRm0FuK>upie@L^ z8B)AMsK$Z4Ap=iIyy5}vxqFD$z>vZYm7R2l5xBXbveK|_cRV#IIavb8ET6S1EhoL7 zxoc6K0eLO}z#`df7Wf#PkMD}gHt{=@9Dg_O%VgTT@d$2Zq&LLAyDh<+I8`6@3G{U4CB&1;oN_RJsqhrK~F`k?6 z@A*Fu?|66JSDbS`an4xlFI%7rQ;%&&*^g+!KrocYS_i@a=6+aOyUKEg9tB)*7m&-H zVPTvi&^U@Zn~|J*3NPFf2U_69(18{ptX*ah!M7jR;H-*MzciG;)dvhRCpeOGLxV6V zCB?=znJ5s-)B7WnYv+zGD|4av(7>W`OwgOl(;BCZg{heP6)27K*i{cBJ#R2IW`Xxq zz~}v5fU5`h=41?RVxY{+;P7Yxn2^77X>dDRbvjeUEc@S zWNVj=lbM2LPWj{z@0o$#8-^yavsKaO-!%$O=TkXU0@rX3eLe#QU#EVZ;M>g_cf;Nu zn57k6)KDY23gG3V2N8dMrTYvgQSYv{*3__vWZs4∨M5r$yr{HJ{UMT6!wROpT8x0kIVHp^V@x867||pv^u_-dYhNfUC^s? zGcPJ*(t z7vW$%Rm{DpDzr$BDUuNe_bplLiPn*pt{eR(?;rFR*S#BQBDv-`(=~!sHfqn{uR4DA}Y-3oYzZ|f%^275sYIz`BVHZ$WKjv@M^|w8GzIwOgI|xC=?nwH%kSOLAttqPX%p@E7Zvb+>R}fk zDfj9L3xBYC{aunJu1N6g*rlLmh+PfgFw)-k$Wi0cCIgR@&OwPdrpY?T|0g63NRl5_ z_v14c9fP_r@$8jjVy%;#$_@L&s;%ejt^TMck;{-ie)B^ZJxOYrY+u@Qt@x{eO-Z_( zI`$X8p?+>aAK>M>kQ#@jv~&dU{;zQg|4tgtiLM-TiYGF9gFhzMUjTG_IP}47sf0Me zl**P9r>Wa-S$FmA6q}Q+-{4=+o(kW7K~xm6pfy!B{g3cPzmN4~3h_Jy>9hPRB_Akq zu~xMJ5aDwYGd;kXL&F?MHVKqfwh^Vm6lVR(}hi3ZVM4N_-qRQ*dt> zW5zaimj8YX98B{0jrv$O?a-x$-LmiH>yw+1;N&vVir z#{S8*LTfBq@+$XniTwv|%>1o4(~s#t=eSLiNeU?-xUYt97ajX{Igd|5pVOV?o^YOR zXUQMw_6D>$TB`5DruQ1m9Ysqus?$(-5qfTv7%kj0QZXkdvAN6?wmZ!75c1;ksS$pl zx>BwCSKF_TZQ8;BvQ)1DM^k80`K4`codG4*aWf8@N**U)wD1$;Imuvh^vtjuy0d_W-!EHis~rtbOf)KPeE(>>X!PGKojW=XcXq3?9fW?w1>}I9xl|gX3OWWu;>cz$H+O% zqu#8<4gIdO`|D4Rtz$+0T*6jtVOlhS)Pk-$HL`!7Ji zN952E#P10KA`lSfsQ6amCXaLS!_sw;L-GtubVHX?B+xUNwA~wYhmouKRcON1#fxG= zjIQv}%CzRq_vad-!>W_GXX_(fnXU`W%z2nx=68P3wA-xtK>|b7AysmqR1H2-*w~;* z<#zo(@q<`O{rmWG_Tz7=Z1@0Ne(f+2^3t`tb!~lb(_{Tc?@*wB)_Rh+J|^v`VSezw z*i>5z)hT|Xe$8vqd2)=b%xt~avKgBLcZ+)Dxx=@YC^N*z#NuQdcV zv4ZT&s-2WNK_(wRLr#|LPz_bg(p>x&Np16)DcnAPQpE8@4B3nLiVE%PHf4V`(cZ2DCBQHare|*URFu|#dgk0lYfqBiUHU8W zPAk}#`+F-_(-}_Mt?hTih<+H&P=sW-JFKeBSxU2nWs?BY|LBmLgPoJ8X`eJKe2owL zO{5m;x221Y^7{Ts-*nC3bIK#!y z>ksW$Sf&IT@t*@C(YK0}>16xoVjO1&tp<-HZ|84jXp5n`GA|dZG)lg2eUi-numsGf z;6Pve2fGU3j9{>s*3P1rvZ7z%Ks>qccQ(9D9slN)g6_uKO2!t}^bVK}iNV954sJtJ zXb=NHadAjs4y}Bwkc*H*_)URwzIi2B;ip_6v<8;C=SkkNS}|%Kc{>F$k}#w+Il-

#1!$arR zX7i;{pNcVb>%-xT!^BZ3VB`dq4&BCQ+P+KJo>_|qASWtKKJQi>q?yz4Hd1x(IWTMf zYmJqc?U2F&2xT3Mx4tPXZP&l{vX080&~YW$4N|}TRnVRB*!2q>SMpB4WBxl_+4rSa z6Ns~#{AaSon$7Y(4zK#-K}d1N3BV9yIh<(eYJoD1&)Y$s$Uu`qN_1F`b2b6JBld%n zvYdfir2#WfKeh0~+jq00tE{?9@6$cv86 zpxprn^R=WbfG^OWp(z!kq8I$bs@b3dKygzebIuv96#)n{n4i&brHv2)_t zhJv%yg?`0PG#yC8=f_-K!|kVEELnYi!iABvJAIq-hfqnnUl1w2EjjS4Vev8EriSae(ApINN1NRfJ_Z=_cP{*%@^chb^1@5&G zwb_>u(K;u&@8};Bu#Og}7#eEFqO0ZXQtTE?fHXBC~v}T_uTPF1NGKtn7L8u070jr2a zJE`NGtx|)Ilg?N+_0X7a6KM^st$PWzMhfl+*Eyybrg$WG{7FwZ+3=jzffv8kh-Ybe zZN{~vCvy&~H7d!^`bQoR9c>)J-j1a_V0|YxVzsup+F|h-&oatU*lX3U%<3 z&QF{R-h3s-hlm@C2MDp#!^Qrx;6BT%vPPuGfgtaH1mR!v!Rs_$$U11jQNE{kZ@#PS zQ{$Vp$9HW#s+moFZrq%t&#+p5z6WT|t*5_r|2_xu^Qwh`z&^vIlLI*>aYR#t8ohP= zbJO(b;QKgMg~18Y_?KMNy$y=~T$VSd!l%nlfP$|)^rru|#tZaMC260g9Yh;JplvL4 z&qY0|t~%w3Y@d131@=LbXi5&&LjF@Z;;X#njwNY`RM-9`cemN91UtA{f>~rISmv-sl z2|FmVU=R}%6K~TO4VgUC%yeF-Ud`f3G5I6K!m*R#l0HxvxQ&pJjB904v`HMlDZxjF zmzUSXlo4TLb2;_nm}9J3?gSf8(wt2-nGTlUb6mT&vm1Gjf^~i4ir4>b2y=MJ`%E3X ziukt9Th^K`c;>Fl!tG+?yDxE|og)W78eGZsC-S6F9!x`86rC#>-LAAwn8d|z-_9$K zArEH~JQwGY3YlbnH`#SV~_xCe4(cEg4+w7#+ZhzTdbuqH23RzP#$S;Z;chjmDrl7o(qG zG=HyH2Z1SNWo6en$xeCp7?s5s@s(y<&4SnsE2Uk+U=WOfe6+tA2}pv)j85@epVU3g zVoeGvL91G^i}w4Tm4~|%+O`x|bs?x-=EL({d(;Vhp+@iO6vTj9y8YSm44Gbgm?jR| zo*gf=m@hQ4I7>I=W%F#BDApN2@R)xr8W47RFmKc$UzRK|So(IOy?di{g0R|Z{Db1G zm(=D!`tHcS4+pSQ&O+YP<=teHs{=)Nhqb|R3Yuy?a~DB3yj4%o@4@dfAKRXMGO?~* zVLr@vsfVBa zBmVpAsal}I`DKHw#+mk9TmajH4Y+Tv!j)6%?Rv|$HXlD5G144wO9|OmVEL&;ktWQn zYY4{}E^&-Cy0Xg1$9k)Yc-T0hyfGJRq}x-aFtKgW3zKV-q2rlW6qq5YbUOY4A6uzq z+tO|Rf=l=Yxi%s8=dsV?ewIBEFQ|AS8GEAM@9eY#R7>K zoeSWelHw&;4l_SV1wOq?tQxH<*{6A5K}P$Agn+>L)VWuoN~z;Aa=|Cug5>tCDf&dP zSP%W-;okJDok!@?BlM+P!IfbKA3o#o*`?_E;qL6pa5*Z4CsYjIBeGRPB~l;X+xdE^ zfO}oIYrapMfwdd{5}I{w5D;KKEQp*hsN5Ll1NU*FlE1<+Zb>VokAOd69E_RYQ8p!! z;PQ({6vm?p;=4l0%|So>l#-o$J=HIRkF51!VZ1-AWyWv#OB3}{@D4e$=}93~u_j59 zvz*HWQ|Fu?z4OED<7d{<-PtLGGg z$>bbcbt2)5+6M&1jDN{dy)B}V%2n+$|HBrlg%bQ&2A!PfQ5?5*ou@DNH_%(_UVLS}u>EhlELY3CH>W!WgCxNey6i89Gr~Q*< z8#xxgY3J#prkob?Hg8_O?H6oa^gCB9^=w5NS8PTRTkN~mS)9lc1-`(m{f6x~|J6t4 zB6tLMh4?*$)Q_Yn0}t%IPd8Vuc}qqvoPe<&tenYwJiX;$>%7O^==sU0!ZM1~>7xtX zbQi*OYI6A6eEfBa<1jtkL4@hAs6a}y>7az@=7E(*{@eJnvkJw8&7?~!k5di z@dovW6nQ;WNyr}-^uiUD>FRZ@=N5B)6_)9G)4g0SaabYo+m8OZ8ywPL^uV*R&dqIB zbY7?CqGefkF9m*k0ejNeiG)A)T$Mfd{S6mrf zGL`GXg`p$ogUQJ;!Fh)fI2rx^JZ{N zeukw%QWSX451ZkHul^gk9@W9WOiwJ$j5TKXZ(2bxV)XAEyVa-&_yn{UeFC3g#%%x$Gdj8^#U4u+x;k>uYvhubZ_o1 zK1*bvf=G}$Q`YP3pKm^Kkv}_1^-oH7Jageh&sGIQC2|T6s^0CCRvO)8whT*^|WGbFU@ zitVFj{p7L|AJ2Pcu-jtoPdEmwG9J@RH%Xei3TW?GkeG464XM2v$6QhEDf^~uZBA2l zmi;CsH8MqTr2dc?T59736#DqlBcD1NILs48))P|;OfS5xUpaBohpUE(;Y!M84QKw2 zd(OX~hcjKWQ@yVk*#abO|HOarwm} z5#y*=#kW^Z9HNva#&iCaLp0IQ}(O4t6IP$`~{6D>qMHzRFgK z)vb9Hj6lLnU7s`SDKeZ&T?Cgr^6LQUjfcuh#A^y^|MQb4y z=AweBymLVuhl#1LQ|;C>z|RQzjtCObGkb8%!Ke{@)x>`>YTP?;HDFlU)L*9u{eIJ8d&w!{ujSE<*LHEWXRl+pV>824 zz$rJCxMNUsEIs$e4eXoT3F2)b$^N4Ft9sxDJXlp(iR;px7i=*Nk-&#K(>RYj4v38r zn0`bR6<^BY3RL5p(Jq1~N?oHgR&S4^;Jr>6=Y7Ba?&@riN+;&qPNL#|3#okNgM-N#Lf(PPx$-`0S}cNTkDEWZfDoo_g;y!rt|6v5_TCA^$&apm1Hhm^$ERkZIt%MM+ww#& zUuSE6EMLJ%*b5V+bsnR^gT>UmBI({K??CJ$*~H-dllP-@tq2(@5u`^a+LZP42;!-B zl!ZxTHnRJEC+K9^x`3y-?Cq$bTC<<`W!Mb|6s@L%SeWhe*w{Xo{u;YnIOlI~lWCiW zdIMy@c<#|G#@_!scP8yC5#`jjN8K0Ewm>=Ept_JCqDFfPSBDNJ55j z-yT(7JXo%b8}HI7)KyUw|EbWW790O3PtD@s_{~dASr!HcN$OT@wVW`63L;lFZRvnt zPW5kW(2*s$(#k>uQR`99u;NXkjbq+(EB_|#TQI7%S>KG0=RQhi{~|iCr^neR_8R@# z)n44q?$-C8#8gy_kwr<4>_zZ@%e`AMn~2v6-th@X@g|dN{!}IW`ETy2_(#mkDQ)pI z>!r+Wd9FX;+7;-a67*;XNV2r1T95;JaD3u+H4{eqMV&IYUE1}>)0PMhk)fV0aiHp(4(xdq8`kSbY2?d#bEX>je00W6Hvp-K3F-p)Qu49{h>iV zDJILA-m-^uU|u)ivG&aBw{;jaeuQ~8d)iArDIv}Jl{-ulSVwp$;d7%FGw9EWi1MO0 z%_!Q0tN{5wXq3b&)_yqGu%5rtBJ>R5WQkG8ur3D%*t(MkDT>GLT5+P_AycTt7``Ai|UPJ=v=r!8+qK*E?UsduFX)e zNCUQy@u)HWdxaxq`E_D^_Nm}uJLESXLa z4%oa)MSVK34|a97(>LpeXyw8R`g?*Xu{R_53|xAhtD|lk>pmmo0iN>DR+WYqUw`BpWD%dU^#WrpxjaMAM;K z&3B3I?beg~Y*{aIfnbQ0&9v3Oh?lRrU*bs5_}?a{yHP^ara3hx`Bjx}PSicK$XAL(SpN+)wsYEvWmm$G708QP*K6)qE?%VFw)*#Eh2J=h z8kjnq$4-ZUZOp?7qQ1&IY)spyxgO7Kiox72PgRSxc0sf~rKj8l6<*~u49nkD zzg^&%52uX&^zc&@Z_~3<^ERn~QnJ-_!?_gVpv4Dl*dbsS!jxveXUN|J1||FRz$>7w zQ^ip#II{k23?qSJ?H??~(N=7Vc3{qR%dnWpX}>OIH7G9%jL}jhGqiX3gS=H5V#w@g zM%s9-AuV}O5goJ{y~VK)5LLbz@y03b)y8nqzrLhKM`wbe8X~n$8>ja-poF&zdq5vh|(=j9!aij@Yas~+y}cxJH~r}JOvl^YLqU^ zD*Y5oE8JLpuRiHw$A51<3dRvfEL}o*xK%p3R7zB+qzM!xGip8uhZoQ9uHLeq1@DSj zw91EHLX)}7KFkR&MNmlQ{rf?;*)#HtA1yT#JbfGu{pqU`cz<7rCg5M-LACp+Y~xAX zH&=F-JoIv!II{Z}Z1O?`Yl}J)_b@)WYIYS-AP}lS0m-@@&6d zHD(oZ=Gu-Iq~YMqWZjRu`z(C*2kS8;AaPb5Y)+6mDUtW9-D4-?yRX292C+BQW7}M< ze<3lV?8xI1_bnB$E<*d1`|yRA&l@q2`4!5D8Z`OmH;1o2DZ@g#Tph8oSHqjbwHTT; z&z0V;)s-OeR!D{a9gMo z1^TB>ovRQ+qn~0iUKH!*riHw*Fl~a7B7ZtJYW8gRxtBgG5!e z!CzRW4ZTYKkl4YI!1?{a%=S)`U6N$!u8`YiONoBylv7LkT2)fNxsGUk%jQYr;gV*F zw9W3O<<73piQeX{NbKDy`A=^|n_1q}X-CDaV7MHX-GRGTD7QvNB^prrxBL09>VR)c ze3XK0p3c^(YhTl}(*g_Ms#UK&XS#50Ne|898p67%^%aI(F!s;!4EB)1HUce`@n&HD z{S$o1$c{s2nC#h^I4Uu*=h4bom5jYdMnnr<@I!=o`zGz~l!ML+DMQGpjO9qSjKbu8 z6Sr=CQ|!7LoM&+=T;ajzA4M8(f7;2_TMsi9Yc<tgk-j+Kx+B^HK1iWsisRKXB1%4VV4B@FP686PyPRM8tk*{MJO7vd=sd zUKJ0on4gcy6fhDAYdbjjAVhSZ0hC92H>SNTh@4r{PY?&3x{Ify3_!Jr9(?uQ+{%gB z`rDt|b?}&{)DIudvWYM>{8wwaCIi$2tK*96UTPbrg8vWbWsokr%O6J*ef@ zv_ASX=JWmAUi;*RPk#8;9oJ_rO7?a4g2PM5Y~~WbAfc?Rvsy2q=Ku=%o-OAP%rV+ZRngtZZmmY;=TCY zs?6r|bjSIUJ*i(L6P0Eh%kwcnJSg}xkW{-S>Z(nj&ARwGSYg5eSLyDw@kV%gXH_oRoq5ndaj35LM>x>`t?3HsKMA#BeCe6sV~I6TSxNE_xhQ$p z_sh>IuwJ8qjaCbd1f~vVjY*Pn;`6hGYh2>uwa&Gm^|c7U z&K;a7ar;W_o{+)$sVH^?f7I$T$p(B>tF<(R?)nc2)kR!(x2DrAR)7K1ycfe)w%nlT zPzcpGtAlX-kZNsydvR1%B29W2^}EVk@X6? zwCxf*Mzd%bf|poh+=ztIn7b+ z`x_&ZPf|3(0UY%$6U8PvCJ_-Ypw0=jTq_?;jGyAHSOj|P)|E6ajbxEiq}Tb^F=xT& zzw~7D)XQaXeP9o4mqonJ4X4_aEEsiJ_T!=*@v^!W47gZ}u5+R;zbb?-FR9Ll)eHL_ zdG9$e48~kAB*VMR@rbJ0^@U^1JFsXRpqSNh>wxlZtdD*u9 zC#pPHuGCsRU06M==X=5y(?~|*T@hQBEOSW;UDV_F>qd5b>8Rn6df$u^sV@=6#GKu= zSlFCVBj+*qLhQFXK?i=z_I0 zF*h5j00M@>2xr}~3T&f*Gb$20wg_tkviMP1LtxpW_BH^jKe z$Kva`k*^2uWo=V%-xVK^F=qz^dsx3E&#diO3 zc;);;m;T=S=oc&VRi_F z^&QNJ0Pu?#do0!{h(_s%Uu4FS*<3^@?s%dc9SB=h)B?$AOzp_*j$a2%mKGSCsNNM% zSJX)w83J6r({()cV>y38y+E4N!2%+xBM~(E9^PfJy>P|q)8)em6kEyrm$v(w;pNlG zmiAMn_BY@8+ps7^E-)X=a{scP*9)i(3;KN{Xfef6C`w)U$!YNC-)HLk4BQ{-F=bz1 z9A`oHO@8_WU11~tU&#mR3ai=XSynSW%D#v9r{#u-%giSehzBoj*1>pV1+ zv?Sh_LEGVDv`rF2FJ+f{#-v>ByHuP9rEtA!tYP|m0={8Qsh!rvKOvcue>D~#s!!%- z;;5tKc3an%Z6>uJNCsTXDRripB?4$|?dyIsc}K-JHcut}i?>=ogMakTS!d-d7xouv zj9VyD09r({CD!iUz@eFP{@rHS@7 zNBTzw!Ix>YRNuEc@%3u2Wd0>q`@w4gwe2PPNMO!hEFqbTlN#ZliGO%><92HAl=cgJ z>=u2<+QE>!N-JsFA%^}W;0sB@FGOVEyf3u` z#ivu8iPZ4f5nng@$jbrwkzqEKa!~rJazgY1Hc+Njap(FB~!Y)Y) zwOTSq!6S&C_&q+qN9d5VT?;*Bkr41hnr{{xNPTPK@wIRZ9R$U8M_-A>-#fLcnHh=K-%tr^&^PlI2VjgB0(EZ}IH`RIT!;>Bv0)u5- z!AEAIGA(%rFvIV{v3wmT^Voo3fgGly0BAdgi z4erab><1^0jBM(r{9U_$<>j`rwczx-L#Ntm@Okm6w44`dXbTQWYHX&6GTn>63k*d> z4AUcr*QukOL||wHx8!>*eSA*!mpKm}RS-bmcb?PP`PnT|2aC9hnIu;la5sA%>Rt1e z+2f6W7yqy*(!?rDT#{PGXZz1iU%H7!wwnvpbFE>M^Egp zb*qUuh-Ntg@N@oTV4PX>uY^DXur}V7(pb8cMYE#|@UOndXFm0;?tV*=| zIS!Q5$p?7o{mHt*fEe=AT8B0JeTchL`;GonHXg$qGZM;$si}MVf^(RAsu%6N4Z~yl zKy)=X46k!KqJGV#d_Cbme*yYCC)DegibqBSH4a~iL-e=!s$d#iIv@!WXswM76s%5R{C$qd8v^8JW!oEvQ;TV4x0U~j&X5#fU#ZeYj z*_pAiLAm_A*orJQR-(x8cBh8pEF_+4KU{(0peq9rYpp@T!9b18uS2$Ld(Gv$_x_i| ztGMYXmn^vRDBE1wb&lhG`69*9!$u#5q?Nc`=VhACtyG~#e^#xhgQ@?%o@Kk{drb3e zKf6jdD$&+-)E)5tcr6&s!OT4V21*i?D3$wMsj-_r=?4Ox-{?3{^d0%7aJ0e1iv;zhf00GAv|FOQ`Vc6i%QmyU&WQI^g zaIle3s`kq4)s7_Ld+bj}`;KpferUJ(pMPdHUXvu-lmzs0bD1+jl7%%l5{X0oAUl%s zkG1CEg2qB3)1xJUBM0xyI0^AztkxVjO_mgIw*l3e@~;vp_OkY61LGsFDgaCxDnQsj zlU0kISu`4>%uN=X+A`N9zDIxl##Iadi@~TB?=$jrJo{;~FH0Q4ez6b+Kg2I$xE$wp zZJ`elcj4pNw`T=-B)nG0oKAneiu@ZXz-<&%>#yQ{V3B zJ_#B2@lTKkyb`oFmRKwF3#cxaf4jMl>zy0R^fMSh17@pVJxqHC5{o_uEC>vL6X1?z zo|_TIp8E$77I_|Q{7$40fj1Y8k{)VfbF6lbuhgh&IHm5*W|JC@Q&HS!xG$Yg-0{>p z1WZWzPB#TDd32Yo#nwQd2p}V?`|WKip^n)P4#6auLj@0-_e%O-duEJa$Byb!w zM-BLL3B?pTtVLH<%BcX&<(s+qjAzHy-n5VNtyS}M3>4{vhKO`Om>;RL+>Ul`QS~o6 z-mJm;68)ACit$1!P+%60cidc!LdOp1pZehMw@<{Skd!W;ZQF`YPG6SW{9V2;rr^gl zJjBX`B)Jd`WWXXL(7ytKf%FI1as$!d-@KQdA;s5d7sE1W5i$*M9NUzVJ|k8wlH7<9 zc7KPZ4HpW2`Ya8qI^LyOs!I_y+~0G-rhob7l91vw70NxjagWS%R))~^C+XR(o_@NZ z&0X_P{Ur898}j1?-icd{}hWD_B5d z<;bpV#7c!O)bLtRyTC?ApkhIw6+mzE&XSlS5%)qQ2%4{=$K<|ejn zPp~p?S8_5GG@Xe8iEvlc0HJV_zXSB0b+4xnRBPY)_bI>vxZyKC!DjF`;ch6Pj>_#Q zTF)T2G$N3zee4qgzTo+B#@!W;S35O|dC3I;?zwPYW!6)@8c5^H{V(O`;!~+{gw{7I z06Yq4*jA5?U5b?sy`G^i?aAa_wBFB)Gtb<`- zKj<5rEcVH1h(NBSD8HmVUpWE(HP#1Jglcxd#)vO5yKdQ6dpg5zoIT|eM}jPDhpG0K z>rQ3jYO~z@;qS54Zn=^T7XRl3m^Ss;KfKKwuanApfS2teFMc0?y0-#NHFVU?png_! zQ^TF7hVZQZH1)*8tnxSW23T+J)+Ac^(@}tzpw_h@Wy@hjs!nWJEkvR9R%={UDNW&a zJ0b}7KwDph(ckQFj`7l3_K6WhFba6WyG0jLZY@ zm(qy`=B_yCldNfQhY<5C$Qi8%O>6S;hljK-$+T0SofU6C$nv>POkh8Fho5|!9X~~Y zH7O712yZf1tW-t6DPRASl&x^Nqo-eoA0ehmy-JS|$z!Y=73Y&W_;(~lqS-~u=L%c^ zF>&YQu}*Mk!}!@4+0Q13%Jp`0dTh zTyv|wR@6oBb#Q0i9N}>Po?`C$KqDbLPBmT-sFuIEFwZ`&*U*)PQ#5gFW|P0L3^Xb3 zT|Pb9(j41h|zMV`(EFtgfZSOdew@l>5t za;o0F6${Z5PfLg(_+)3k#9*~phbxH#Tv#!v_R9T%!*YHhbQkGB?jw3~#0EeenE}hy z{;qd1S}PAaV48wPi5~VKc5%@**4=uRUcjxlZZD26+ohUOE={V2kJj`AsUQCCsDpO4 z{l2DhLv6`q`Jt8GI+vG}H0>9XUmGu34Qb)%_zUPgNCa3dUMw80qNdk63k^Kz-*wF% zppOKQpC%M&ix@_+vM8;32ipWyw@lIS5_Q}UeL>3I+CS&Y(0UPMS@nxX!3Hp zxvl4GEKL1kufM-AL>2EH70&D*&_~L68}`TpSElWrj^vyo^ur4CBm%rpI9=@%(S-3{ zd|J9IrR<YME}gaOeI>~I+Out<3>kEKka!|wybuznf7Jw}`k zoJ?*(|0?y}KNn271x3eQL(ldjK0J=6@m9W&0k&aX)esT-C-2Gj%u0-L+}a7HgeaYe zyS9hL$aot)6+c7wOJ<7*Fa+BB7^|w#>fQ0PfCWd{ReIfz#-+*4k-}?xNaEW6+Mb~v&p1%uA0D*z|xLp9~!of zEO#L!e7HV`c0~#Vr)m)#rA>igONQTKjFKUzUk3DlYS$g~VCXQto3fLu`#5_RW$d}@6QFU{Axfdbi2g279~^_3_V3Bd&$)>ui9`eblpyVCh&#!oW0rb=J9voQ_L7pvS#CscC77ksqJM%Oqll zRq;?xz>uolK4oI*N$a0=`<>c;jwe3vk7v>|KHy`24+j;0B#1dma{*MT0%42eG%Y_! z?$`G^iXp@PMH9t_H#*}YMr~7a0h^t^KPCJN?&FBeWLIv!q&p>qp&OKxZien2I)~wGeBbZ=;yORhb=D7Y z*o!@Duf5h2_j5nb0t_8uq2;6q(dJl zG%mXuYL_1jnWCC$%3PKR_?(`R0m`>#V=ky}gREVIOz^#K^WwUe=8cs)?qy&u-dOY} zki@kz>DS0AJ&fWQNUSh5P);FY2STMhv=4U1DovZ7upvpiAoB%kBORogT+-HSM^jpZ zypHSs^`$C|FBc47&~#n$u5ey142LN~p_c2~cLDYz_RCAp;6a0nMx!mZ*!hSBd6*{! z9f1WMp~b4_KFg>Y;Y%b) z)d{ertCdUMv0E&SO{m}s{Pvsu4_r&WZX$EHtB#% zUnpBTa7so_DdBPo;18DCFMfZSVjf)C_j|fh=bnmhWu2XKBGR9&8d~}5_fstkAYMSf zKu0rK3#jug=iz#J$>HE;{^>vL3c@-t9>M@gh(S==6Sr@kje8cUsqZz9XsAJi7|W?Tc~WWRVmFBSHwqX)1W|h|}?t)zp`Q z=69`u8Q3N3RGypY`BtMkT(Q{37rZWe{lo74Go*6W1D@^2_Cj%yO(+CKj(C((aZ{w( zxK;CDX3A>IhK$ps{n3W(J@ilA`Zk`(l6sJTde(SfoeO~H-YKn7D)$5L%X9hLd>ccX zD9rSdrFFtfsz8V(@vpia|n%$vyUE>W%=)b(TLv%Xf8 zxjpxR{2pBiiH>@XYg!Vcrpq73efdS0P&ZsY8r|jI?PT(vt)i=Zi65}aYE2;<{Eu_9 zru#p?gx$q zYr8^R0QU0yS~a$WSJB8|*OQdk#%q9E_PF!GGmH1SRS`(qEaOhDjg*Y2?X)pyti$RB zwz9D^K(4l64?dp(&}l0HlD-apZ%^I&nCgXyV3aSIp%I$ThF1(yiII9uYApg6pFZz0 z>)-Hr7#(B_wX6{JnMeRYF}tL(euflgtmitNgp@3QW_84NdUSSP??iteMTL$>M($jj zNMVrooanoaBD>`>Fq`bsAZ3{VU5#>g1Ejo0FO2sOm1fX+B?Q_CV8{*yXF|$&MLyd~ zW#8$Y==qfA*B#%84>TtJ7|Dd|ky1XQV!C`oiR6=^1OJ=Q&4-Rn(3gH zkW5cod}JioFnLnuxQDtCyHr`Zz250Pth{R)UKxPp0B*)7S9`^Z*)J6j76l^?wP6h^6;;Y6MDN})ouv`2tZz1%J?34)c0$CSyj zzVO4e=}ZvBuHP<0gq$kkqoL`l%l#<%YN9l$8-9CSco5S4=zc*vo5rUKdquQ6Yfr|k#9ao`Rat>hLM|$@l(a}rWJdiiwFLc7v99k(Q07vC$ zCAULvA(A~29&%@XMKjkPUYon%`~4Xt-x!$FreM){ZRSDI_%vXti0kE`Y>RDt%zj=xjM|1gJJ3fY9?SH7+$W`K;UXumXsaj#3Da znGRB}b#eQ?1Orp-Cx8(86+tUZat|^*{9~K!{nluv7sI#;5Nk#rx^&te0kE>N^}_c$ z4)5f8CV|FJ#ld2KB$cSBI-aMOlDk;=*K9gDsp|ZMI|0kr{G2K|(yUtT=E*%IK`4eAAyPijK$Cs%V*Vor7c8>4j zWH>~Ty6BK)vp7rvQO;kT?*`wK{O$^fw+edT^M0$+^A`^hJ<0?*0N$36)O-pJ-+kv$ zU(Yl%)QnA7a=Ftbb7{S&PlT#J0Iv4xo9Vh9EZVt~uuc?zj~iU7McWk8SY{(n%C2R=W={j}f9#VR3gF19nGHu>bQZrKkI(nY> zB?9HnvIbh60uaGwb=hiJ{ZtWEeCeW76x$%2Urs=u*=~03SGMYXyB*_pnzY4YRik0V5a?doDh$Sg)rLX{nB7BTG1)2#CSi> zpoBp(+8~G-O!r3$(PXuItnJyvq*Q3?csv(fm!V=jkE`!8ht1S^2k~84Jy0GVEXO0KbBPb#$h10_AP z*3DffbWpk&Ex9IedP;NjLgT{GW6|&r6cqY$LGgpU1i(T2>RlT%TFr8F^W>Gr_eQI^ z>QQqTHO-NcS8$}_(jZ`^NF)47<$!8a`g0WfpQwzUT#FI&`5Rz*uO?FizO7o z7$NMRYuxv4q)7S09pi|JEY29q=V7ekUHs365|c2xMtUmg^|+3!&ODf6y}}xKb`9UY zg16i{PjmEeIqyv~lai|b8f0$Rrdu(A@z3I)fBkLKLZURx;;6#=2#uN!pqVqiNAc(Y zOSj1bX&~r0(Ey2wv>hz|oA%Gr3vvYbu19P80Y{1f?@vzwdm5mQ?tZFLKw;Bce7OZ< zRwM?z84hc$9~MFQgwGGv7vkxpLA14r7;A=^5h= zTP>vPm5S&UVuF=sX9iIk85tGrR763x2bNspfavm_Hm%#oH=pbI*q~Fg9Rqd#w1c56 zXoa$I4_KYn3%fy!`A%lcf7x_1jY5{o8>|3Fg&q_P5Nt$6(?oDRf3sUF>Q>w6|2oTezJYKq-A|HQMtSOJT{J5!Lz=#aI_Q?JFQ%1~2$eydU5ZTQ3~#%+|q`?Z#d} z6999|498n8H($b4Ede!15+GZ*EO#Uq;QrY~6OZJ-Dp)Uq;y3}r>E>?#-dQt3aCy6J zXLo!z-5>~28a;EmSB0>@rLyDp8O3vxlXAX)j7(X$U9OeFvjT@9~BvDgG< z^e=<}wSpQ*$!$+Y0OxNh-&ZakmN8f%F1*g7xn{d~N|PSaOQ{0qO0bAZ0Nw{+#6NC9 zY&fL#WoT=a@Gy?22;cd$`b!GjX)f!(-97=_BLv!ZalB*VV7_*UT@~Mc$7H;ox;3}f z?@3F50#dExSrW3RNBM*{+!OMSG0a+x%v8UtY=JqR>B^db(CxFgHmn$J8z9mq7im(w zV&BUd^JZ5P(5ibk12v;>;PiRsRHW*B;hci5FA8L)ln6vl*fJ)){kRnB8Bju>QEo%2 z#TC=84_c*OUy=vt8*af6*MX@`(j$Ol`s$+15pNfkgGvqXs8)BL$Z>&1IPM}o z(WyizP4FWwf&J_B6k#TfS*48mdX)JW2k4Pk6A#WPqpyj%MNiF z#TB%`x0aCk@14$<4#BV0YNh^AOL}_JH^}nyGFGCZ7G45;aT;x&mcldKb=CH2Jw$Eh zwt(f{d1&aYbsN;4zI#*PjbbE)yvvlk>UY^MGi8CjZDmoCCmv`h_Z>vSObU(DmC!k9~q}iE)Tsas%e#gOers>58bHnTxuIk00wfUVi=B#0nf>cx6hs zjXtTgH*dv1IQ{1Q3x+9%dVmiZtg$A8fyQE4(igvn#sd5#eBcg3jBq*fob)IQ;f&y@1_8X#d@2@&~+}FOsyFeUn{s5`&=ysAU^> zu(pKo#VAy$e0TaNISHByDsIf%OY%?uJjM*;?q_cs_dn^pSlPj>(_;yfW8 ztV#Zdq^rOIeI=5MGgFf}vj$0EQyLFvHyzM=->1GK`X&0buA2J2&^;j9w{GbM)fdqV z3j1Ymq{TSIh109SOKYvGQcSkmyF@@pH07I?s*GFvym(Sl&$lHB+)ah88Jjz~7=6^Y z#=!czF(jPysx@bBqSx3mVTlT13+54XgD+LXUmnE!W}^L*1A2r(+bT`nViGN zzRxLE35$q@S2+Wk^u2MwgjHlt|8W3)T4Tlx#+$Ope5kU>P{5h(6vWIlzX=Z-6#(#; z>Kt7~MV*BhxB>;y#3K`0eQ_RY6>Bu7alSCg6O~VAd@lVsLD?~1a%X-1{2P_uV8z9$ z!+tydcceXG3C^aJN#0VCk1yjQ4v2_t+~V!s_db0u$G!g33Nagt(qWez!6x;htgoJCF!!X+fH){)@-)m6VNpF?IOF? zn2I(nLUN{=Ui=~(9Wr=q}7Ho2<5hncGvceApl|gJH$%Eg(DGn8LQn z?lB%(a;Q_#`X`{{suGz|1o_ct061Y0hZ{abQB1o?1^a7Ba>811#i!8jSf#SZFDqX} z#J~sA5_CA$(XLL8`4z9nIgMA&C`W3fc>$lOj!(NeWd~y$qu`wob=x+0vJGIiZNdTW z2^JbPZ^m*LA7U7ll47${$W{qsiA;AAaM|Jy->TwFJ(>>Xu#)&{GZ)f5O5|^Bb#fm_ zV4>S&0CEc#g+)b`o^X8$Msu&)hAKGSIanh( zZrc0JH()i$+=!&>OHkN5e51<11P}m~ornmd37u^Jj`MKP-Bo2+b@!Z^{WWq@QtE)b z4l6P+yP6`jd6MfveUGLWA4Bjm5HRka#kYfh?**vXa&1O*5h8$$8p)W4s)fe<*mwik zJdzO3=$iU2c)A6Hr!E0#$v67Ui>7O|9SrapSv!CVAM|f2BszXB&OC!9Fi|N^CKjV$ z-o8$ClLhQ9*@b!Q>$B84s&r1%tfKNZ_!SRxGZR1zlYJ_aI-`7t!qa=2UuuvX;un-5 z<>HniIhj^2RCl(gh{-(0-cgNxD+IfnwfVgAu>S3Hzo$6N+)YZ zvgdRFf9@6oATc?c*(j_IBbagv6W<(3o|&A5EbBJ>@T|Kn_TDAB-((ifnXzwZq~33awZN)(-=3l?y-ontNFdfFnsLt!EkexIp5 z7pABN?4jo*-W&u-J)KuHfX9Oazx9s%RCmf~W%p%&IX8Wl{Vg!*pp>uYiUB_B*+OQ1 z*C!>gq2AXvoll&|m!tuWsm#mvph&}{KU~ETws?q!86^RPocem#er(&{qf*nswYfSA zQ-ZbL)XK?7DVzf*rz?7u2(8`M7+QkkR&xWO$sN;5shI4U^~~GYvptPM4kzCh@tGqH zwV+mv<0nsLP#a$aijaMO^fbEDGi8dzwJ~GG_VL5-oqn+|pt#kaZnOcSNs(|UmOfTH zi3iiWQ$V609j=RhV|DaP5pa2`J1BHfX&~$WeY$+SC!5yebuzblRiMb?@dd8aS`Y@K z(RdfRGSTleM#~%rJ%F$l-EBCxzK`O{b60DozT{JSaeHj-H)$mgmR1xvko&OhTa!O< z7!Yt=tLK>*_3IJ-fMv-Rbem%DYPWyqc!N2qd7xY4OzK^pyIg0#rN`3s6hcbmR7?J* znKX!SUKcRQWh`ydh)P1(TI`2#G3)JKaJdDOJ~F+Yt)9C~I&Sw8PQ$V8IYNu$(_oK! zj-?LG_(g%_P=iI5?9q-1;qqvm5S&a^HogD|w@yFus8^uu$Vkfh_v7LyD1+Sq5isVM zgei%tW5e>EOe$|s#d8WtfC@yFxZM9QE`ZT^O5u^{bl3P~LAcx2>Dl&K1Q^M!&n2KA z28BP$EHZ-{Q)Ze6EKe*E z$uG=jln+~Ce&rg+1_Qb&sUhUuDo}G_8Xz%hK+T?w8f1isTa zHAEVnYNUvPyQwpJNXN94NlFwk&yykf8PFT}hN|Kmlja%A=}qOt_5*z?e;oPmW2|JV z%`Jc|2bp@W0CG=sdMJrW-!P^Sr#FVpl&O2Xcv72-{zxg2;W@-l&a6*$Rqg6k-Nniv z-UDB)5~EKKof&oW-HquY0&!L*G;QPLY^xMWXnG+Sf;?QI5xrD_81e$@DkCG!(F@+! z+Vk$kK^JkaGo@;(7BL-@0oLX2a2tN-=Xk9%iWCZ17DL~_A)0MPcbFZ6u=u+F;Sn0+JYI|&nqHyah#VW0NpnI1T>eU%wh)BIesopoKT_(`4Gpz)g94G_}exs=y93^uF;5` zXR*%coXf)Vy_x1xVe)%414Zp9UetzEW7Ms~@x$$uLua|!9ncD6a6bzZt&$}vdIvZw zkq%W@_}}%{WlT=+v>}pzFyI%rq4TDC8+Rka@Gu znm#)-!*~DA?6Vq7z^>3+qL-AAP;$I6JE(c&i1x#Yo+y|YNo{7XbSp;W%ct<-`bpma z^LZi2Y&8dejF0!u0}p!VD%XeCq%;)M?9pJZy#r|2EM?2_8!Y$Nb6~}?YMexRObm%Z zm-!esY1QJoP(-@-$m19KI$1!ji$sUJ)9T2NxorVH(~H_d#r5G3#~`utjYWNS9V?P0 zd>#DGS|C*LYpq{DnLW8WdUFX)nK}qmWGebqL%jW{?!I4WH!Ud>NTV8INECWUxyC$O zOM6@^xTE!BV131mCH8A=Zz&LhE*M_NV@<~uXM``RSWFbzArMjpVQxf``N3R4ev={R zSR0r26jYlpdVzW>2C@jPkkphvVH=~U{kfSU-01A5Q!mvUYjI@WpE|zTdPMZ|Bm5wb z@5;`+>V7*oP(AYhK;-YDHi_7Q$v^r z=me1X%6_Byj=ZA|UJcc%eV)zd8NUSSj<=PobYyRUTC?yWg@~S{*%L8wRi!o~I}+`9N`*A0uD>xNePP!c>fb!!^z#QvsKi zr%=81-W)Jd%RO6|C3fBhW(D9dSW9|rwec?Z50ANx7AE+4-J=6?!I7ltLeT6|{piF0m3qx0z-d*XY{oZj66^TBo)L_&!^-8z|!$whY9drMa6 zf>=Kmb0=Y%+Uwg@JI=NMHFx^{BhV7)BsgMPnX9dhozgu6pj??619r6vxaD}j;NioV z)J+&5eLiGl#PXWrK^Hk#L3FgL+2lr$}BY`xC@?JmBwtr<$Iq(O+2h*XOE#Vg} zsks4&*4?&Z0(4b)%3F~%_pI+0UBU4-1NZ$15HiUPYSu1xE&Hq{W6adGE(;NblelX%TchPgw;4~);GBLz)V1oSqkz+#m-zd zIVjPupXJzVItf0+q)|r^jWj|c4dT!)c@VD`O;I7_7hYl9K2QEoKM14CI{0|73?>-A zS>s5Uc-7KGnG#XVlmr26x&EHmvo(%!Ry@pTTq(;}36m=jtt%J58lldBb7=1SA76DP zB{dq9st_F>KJyjh+*HiEXZDf{O>@c}&nkM}pbQKqurzSJ5L_4v$Y1g zzbc5dJN_myD(es*9cx1XkzZ>jfPR%behP?MrA;F_+;la{RqcbOGoLF-A`YE_3)ANF z=WKMz>mHv0ixiKF`$b3v(@^D#=C+sQWk@=CNiHq-JmWrTx{4TBv+a7+9^AP-c`k0kLrSZhlE*`EcwMkm+k! ziti(`TPDno;hv4=4d!kXW+4YlhU1BbCO&2w%$=cM)j~C^iO|ha_O7!DYq_^2n3(76E8_PR$Az&XsJu(70Y5=RyC}G^l!_KU4xlkbcB3-i z>RL~9?)SOl3j^lfGiT4Qnw{cC($OuIoA3)b*mRbixHzqp;5**C>@_*XWqQDgwE2_% zj}=cLl$?O>YL`ZFiCkViTeZfyI{w43Y%2IE_KAulnHE5|C-1;nAW_`ts^0UGwZiF` z)ou9`s|^Ja04$SE@44_rm#rZ^4g+Jwm%XuM0k+9q_7v}nKng(DcOP=0;(OUu=!rFo zMyBV@7%d69-BP6*-7YDaPiH3JSla1PC=-f-AC%WJN}v0j_27g(>R3UFOF=U58V$7) zZPz;RUlib^XaGRvR^^8BFATgHO9GkaR@X{FoW234XztFv;{%Lklb;1&IxJ)6ol+y( zS2TFv9ND<~n2!rwG#lEyZ0tI9M2)`Tnv5}=Lv~kkH=kXAu#S%wec!&;0ShW1DX z@nN{8ETCw){k6P6`TbQtojvNlVG2?$pxR7-b^_#imgZaA)%9Ulyj^^%6gqMJ3z4s* zIoioUQ4u)-y$g|5cPyI{?SDz1;>?HWjtG!HP*1bK)o}tLkJ&6xRI2b}eLDlKY%8a< zzVmjAF14P2plX$miW0P>_fe`>$BCD#_65GgBW{!zHIqPLHXHe8BS0*Q2(+}(S`e52 zN6Z4N-gR9FYrT17kZ4V!pjIgCC%@)$Y6Y^9PvOjd)LIoUa@o4tp*(e-Z7?)*)+^wQ0*p1n^MOBwZ1M4YjR^TRKGN&i^B8eu#w@I zi;RqnyTn!LUF##Yp@_CL$J`(U?bWMei>yz`{ncg%pJzRj|i z#V;RF)-g|>O~pX@{zC~+Yk#=f9B)v&oMs;CWzBue(wY_!GVTOe8oN`{K_Oo-_3<-s z%mi1toIK`Yrd1fCf1iIN)``Z%6ikd21-magx0Jtpbz}GZ8=rA0x%fI67giyJE6Pnh zl;RiS=2tKH$XMf|QeTAT-}8m;hwD#u?@{W4w}^5kfL9KBwmmZ3*uXL%kO=?u?NzeM zfU0%>!R$@Qhw+;QT||w=4u|E)OQM(N%3?{O0@~w-zF&G8m(hik=8}}x^hW54q%dH^ zQI8CV(~5!N4yGot2PY~31h_h)O2e@!_jju{#ns1sUfcCqx1;D&hUlta>f*}>AI9It zl5$z$p{A-!b}f^T~9qW@$b>p-5d>0i9@KQzlXDJqb3R$2tGW2AWRboR7^CH9+7)> zyxB3*9aKstG(_C|Z8KDuf2b%?CdI=W5RVF5YjbnJ0UT1K{r>E^ zBEaz?LIOnwBpjCMiBRu?ZFda?H4cEUWOP+MmM$rxRPhjWMnY9M^yUOg)EC3;>1B$e zGJR%#VRc~;WXj5p;a2`wRqmPP6^Gf(3fnrf!rn;C1e5X--uj4;a=6LZsv6PTr>*Y7 zs-%I=cJZCL=;{To?(N@Wx~Fk?CKd0Ll9NQ!%m8IZo2wD%YfObIWnw_&p60iG_+(A& z?p%`)2Hs|AGxB&4a>qP$SZpP1sPRKvG-C+CZb>gC*iG~!c{t->9!Cn~M!EsoKuY(r zo&-+_-QcG-#MGqSsJyW5UHT8IR_OIdCOr<`>#Co?7br8;rKyEDe z1NNEmrPEVN9x3JDdUCW)Ub@C$TU)iV$o@UvfF?&qb7;KN;3Xv1Q2X*5IhK%a!+d~D zpT5nD*?ggwLDq%?&-o%P82I^Yta+yvrY6<$&$uQ6eyAsT^bGT{0-lUPYJI2H{WjwD z=5N5!>FO1(N%c2(p$3A@YdG_oyn$LZuU#O)@})&>0}e5Rz(t{P^8sOY&@``7Xp&aetgOkxty_&W3wj zkiNK_r!#7FJ~kOyL8@lu!7Sy6cvKf>_s6Aln5Y}t)yZ|U*Vpb z!~!VsN*|W4^Ve1CV=~(ze0YGLo#mekWv(gw@c_)pE-uXz{%CO24Ayo`h(#lFbr67U zaYI)*FS6Dbr_i=IZaIltUwpL3k#E~3S0E>dPC!lYgM-BjR-+p$o!}jF9xVTr&2&Jz zWGq6WmJ-y30`V;QLCjhw{l}v4YYZ1ar~{ejS9LT;$^fSu6@ z2o|$etRnEIgC1dtoLDbeNzR<+)|V<*DK^Ku+|zVqWDL`_b&p82#4g!C$s}!09OXnK zy}6m{!m`Q=!GGIdmqQ}p`5i$0l+pEN$?qkn>6R#F%*^8_cq^^6Hjhq(9qOg@q{E~4 ziDyI)d2_F%on}3(&yQ zMuov%1&`H+b+s)QG$V*N)h$k47VRt|;rPPTN@#1!OKVtEBtTK&#GF$5Gxa7}kopDG(ZAMJ3 zo{|;;2@P4qOBSiIP)VgM?8QihN9;_|Ki592Je;M;u>W-R)J90@zQxZ61W#WdilCrO zqZ@zICtYuvtZ5cd-bmqbYFJ1nb!EHJ8N9{hpYyt&uXox}8E?Jy8GOa@M&M@YJn`va z7%mg^=7yzrhTN87rgLkoerMb4-a~R~vL4o=#$1C}6B;~g`P}1F`Szn{-20jfvRixa zO2WSO@zXSckNh|Cc2+)md6v@MPV43tWEw({ZP8u)+e;pK=V3g@Smf@W5U-*ZQ-14~ z$34ID#&zVqO^POkcL=PR;^RZX9b6mKFFqP~r-fsL7 zzdpEjkg&Epcv>c{APKUJS z`DcmZ6BuLFu-R8l@Y;^Et<^kKdU1)tlBzm_6PDo*$k`eM zHh-MBgtUY)A;ygh1sW#?g*&5d&ey3H@O3x~JRp}y&l^YaVRAg)w+jJ6O~`|9gZU7* z`AlizA5~9**~%G){1#$%J;OLRWr-5A@tI9xzwE52?ZTa7t}z3bvV|QZ-tv@5ucp_d zEuJl3CnP}zT|2#3N{K;+Azc`^sjYX@<=c}%-j+ojW89Ik2LkCiEJ>geziT-kuaM)w zjcSygL@s)jUQPF$@s?S)g~ix@QWDKxbhp2$O~aWSB$jTO)_6S;k&)&-GZe+uPM~#tl`H!mvW-mtTn!W0t&4aDMCrk;n z>QjrZzTZD|IB5@TUJ3G<9!S?PKCb(?Hr^r$w`woU{CV85f!y;lk4RTI|CPPN7WXoj zb`!2^-`&09N}jAXJK>o=g|%F7pEDf)raGVFnv~-q@rLUSI2e8B*Tm}5tvWaI0{$#o zbMi!A?O^`+DSnP>^KuXr-W;Vv92^z0(<`6pVef4?GK*5q627hb#;0}SeiV981lXGZ zIQpn{^$oqh*pdXXH^KdI3~S(!t=K3|$F!r6YrGre95^LKw0mE8dnItE&^%2^)OaZg zUEMn-9eorHU*NSEMmTxoVl8keWS91X^52)!%meapOvr7k(MJc}`6sS2k=q*<2a*1L zs{FEza4|`m*)Bo{#vVi)kUaC!C0ks&Bj-D zD8PIbW;!Y$?_u*<@M-{GK2s#V^@~op@gtOz0C0+h$$-$H&%_WML0)FszH?=_{WcOY zd^7#}XjtI2J2U5T$kOdMe=d9ul1*LE7~jg)^)Lr)HhSM}u%Qh&MbsDDFcXUyJ^D3n z0I>H!t%aWj>{j!MW$mG}T$`;BS@`S3NCxH9_6?t7-UY}6k8fOCO-nlM9#!RJN2gY7 z&-(1K&$sz>#D$j?tn&7mIFr{gjBdJGVOTJ`+Fl7?{DQH1f4GAxB_Z*0lMa$FGC+Ab zUlTDK^`YsoujAbid~QQ$>-Y?2Q@O|;|Jm3OxU)y9S#7AoHt%) zIp!p}>mp{bPoiDG%>~~E9MKbs9kB-^ApFNEkAp6AyucphW$r8L#IF{a>Q;}Q>994b z-6XMdQ&N=eje1>$%ckc;ZlXrK`&fESMam0*1rvj$A4G-N-cD|q5AxeC_$UFVuz*0X zUAVRvvey@POGqNJc^Gh@lpv5*YWYDoZU-B=C9>SDjqrZ@ZOwTuRLaWxAa*}L{f15Y z`7g@{{5wI+(!p3|OC38}FJG5aorA&IXgUWEd2V33P1>ioyChN4!0Na=f@xnTxr9JO(WtjA}Uy+UBBa;cIpS0}_ifl7{CrklDsA^$q)J+Hv?z@o9WLMfoi_ z*Cmq7^Ku7YKK+tUD}AzYRg9dSWI;!mVv+NANsDV`Iuuv@>>%uNZlQ@p+hgxk#PZsX z9;^d1ef}Ai62G%3a`2dPijTtKKzC$(5)nUt&TOl5reJp`HmuQU*wyZzM8WW8@O&Qx z*t4q3l+_Qu+v9Xu8{~A9<$#cVYk%<@(sJG*`EhoNT|%wVBsPHL!@2NF@y_z8-gbK2 z#QVuFUZ)*p9nP_fT-1Qm+^^o1JkQ!NI%%}hqO>2&x3bPq*f4(7n=Gt>mn@dDK7jk3 zVy(kO*@cqGnn70}-D47SE&uoiso^~dG6)E#C9KaIy5(Lr9OnSF_6BcYL^#i?iFiq~ zde!h3!TV@XH#f8UKA4f{xPJ1|*%}tWnM~KcTPc4xD@mP;&y&wbSF&`qlVl#Ta}lp3 zmU;7&Qq1Kqa_0}zcw~Uh8w7VM>*MdX&M%&9PZe;Jg65)N%JMy*Xg;_h5xh_ z{|_-g z!y;DU-hQV`Q3q5?4P&kgXCi9^n3F+Fpfbwyx{8z0UFRrjb<(}MY zCv35qza8~HGieE}<5#|>@kK5(<7F%$&Sp;Ba>cLz84w6mp7Q>YfA+_zdm4@_vSFo~ zZR_%!zoresKcgs{Vz|mGqdGX8oVhg{; z^cU7Ikgq;d-R|d;0rR)`RKJrMtQqr-2~mF9<=|&|{l~Ydxli3&X195w6P}W-MC?y% z>Fsv-92Y6@IpEiO1GGTek6a_0lGg%f)vB(9A5okmEi}SIAVz(q&j(%*)UcK5=WMOG zmJsN$-2;`gF0u4ShqgQk--$CpzUk`(0ViGk)2sxz6vbt!Bj2lc-&>}etbrG9%Al}07fSks{ejzhOSe zx#iqui%qM9+zFh*(b6Ho;aebWBS)<=&DhOIN z+kG9#YsD>LND;6Rv7>(>TiF>cW3gY3eLTuGm`S^EU2SpeIKa=W&9DLssfoJ!y>_ES z(qKwP9bz#y9emJsW(sPVt<_!U0aQLJE|f**(QyS4^~EzA$O)J_!7pu zaH65vVz9oCTNZKLROZb=DtJGc6n?!)r^v&_;mp-;Dq^X}2hT0bMN7NNL6^;9vf^tx zq=dutw266Iar1bR`iJMiW^8BP%xjGA{*R_Eh4 z-pIZh359LfK5C8)f2y4vDhsvI2CP@dzr@ntufDYcbD8{qq-j0MTN|!z#B_feAfH0 z=;Y|CO#uYWyIjCRbclths0JzE!o4Sge|ImBb13teGn`chK9~D1IoaNM8lyA3jZ_1E zn7dJ@Ho#p-QPA%0_ODjnGrMBrBD;=%Jd<$qfbs}|)}=)Y;$VhzZR4(#qN-&!ZVhcj zt~LJG8V4o+M#|47lnR`W*{j}}U#S1J{(yUP#B}IhpV+ISd?SvWN0_hFl;1bY5w z^L>tqac9B)M{5oI@6iF5^xAzc5I_^zqd{rigr^AQ(Z?j1Y2 z9Qzi3T?NI57TT)<|L;Ukvp@&=#&zg;>`BVIFB<~pF6c#C7oSNPFvPz)my1%P{xvbL zJ*ch|#fM?_Nrkff9CYA=WD`HV6R!7{+@Rqt}XuSZ$WA=8u#|Zt?aJPV7AgB zO-E%lN^yTK**yBsiKEDtp>Ek@wDlg3|BiQez114^pDD01*77{eJwf7A$aZ-j?tHvC zwY0k4Z-8@jYM_+!xvnwjK=mFIRCeIOa_{7)RTBlWd8JDm9mIk2{ht}qB`0-Ul3X}@SZ0f+N1NgU7n6z_n1eL|P7 zv>q4~vGLuOq_hOxO$`G%BDML@Zgnsl?BOeRlj1$Yx%m`-y2IONP2X0s^NT+Rx+VkX zYA}x>uzjIj3IEtS7 zs5UN^xY31FRF&g z+G3Q}icax=#{A%)y$E_}IXVA!pB_}cE4cbcF|AD(#}HJ@Gf>0Yi6!Shob zuI^R@18O8f=;W`?H2?8+OBp15FoAUY&fAeFd8XjOA6-=PaQAcZE(ONBCjnlW3FzPJ z=sAV!;^-D(lxw+{&qO7D=6N$*!hIeWFM9)gLO)~QwO0(F1REQg^`HOxW8dfhu?WRH zZ!YAA!|=UySUpNxydN|j%EI&52Dn1zFC!-bbI9{!6^*Np%d zpRsFzcTWO*wFn!rd|X+h0}ZnLd-G0F{Cm3p?*5-!`|n@9(WgeI_}AV3{4B?&Kni@1 z#fpdA?YsYKiEjx0dCPxYu)1fRc_oYV|_o@H8moGNj zKQsCF6&n&Z7au)$zcsY~htB_336@Ca0thU@OPj`z#XQV+w~Z%8q~gUV(FM%`i}tVY zv0?kaJfPm6J^1%P?taKT`Db1H>$m@(&-JEF%d^M$t>Zan-X!-%8t4ADFXO%cc|tw^ z%T*TL>Oox|WFWA8tR(+;Jdb`yrweon_*`GpMg4f8Cyn~g3jb?#iSHx%_me-~BvqX( z2O#;pAS=7+xnJ${j{N@T&B?q>=RSj&pL@*33Y_>Lqn&5H8=!l<5I*DIRj8V%|MTWr z@3lt@(50Qw@?2bE=D{(k4!X8(79h8|_A0nmPxCsp|6ZJb22qV+vX5*91ii0u|1Qx+ zWCPBlF1n~o2X~*d(KBhd4`LM%mg(G`Bdvf}e@LF?=?_=h&wb__H7@w`bGDZtq z?N&Z)U(^hLEP`b0Jo0#gc!B{-e&_r>vgi7j^T9*{ybuxW9)RZFM0JYMb*5PNbPraI z*dCpoKFf^ZEYv}7iUPh3k1CnVvLkB*t2=3SH5@z+Lrl^Q0W zJs64fL!1C!M4a|+0 zI3xfoM7=dTQo_nY8+2ChobhYgp|Tg*+`;i?W7&A!B^M<#UOY zf=Hi3dsa&BOT$$2i4R{@_+)QFV`ZXu4~!~1Z*+>{`l5R!naNYQza-=JK7mhU8+t~c zFteNPV(#1iV8nRO>i2!ALSLDuEd10VxM0KFi7K2m4`X91$RfViuv}8v99B2W#Tw}q z2TSaFpC-`AV6ZPiRUF_0W$FpF@P82HiG6BzVE872l*RgJ0o$PWr@_HybIy5sR{!~M zO+|}5<#9-k)*SQO`IW=dFr{%7Tfc_1&(w}muTMioc>QwM|KKlY!%jY5yZGIibNpaZ z|27CguSK%1zXnb><2V|<6Ui;wZqA!ARuB-SOZz-#EAMlzeA=%P#XNx9@vh)Ggy^#} ztG3dUJoG?)mv(BL?IGn~vl#T{Nryr-ofcz$e1~bj)7z^0YZNt!z#8+u-8GSm_Rl5!*8@J%%MHf218}P^pnd|uG8H<6JCRTRGnJo(z|A(r#@QZT$;)M-F zKtSmdLAsF`x?$+<1}Pc3kya3-k?w{Wx^pN|x*58~p@$F{z!~c0ob!9{{oLnY*w5N~ zt#2*Nv?G|p`3AAg3BRbo z5B6P6U_8S2|6JDJ)8xW6gMNrp;PU9NM1RTwt}h-)I7f>`Db*d0R*rK=2@Dsx38ZQ_ zsjftmLu$hG`(E@(Z%IZ891J1R{7|p~FNINi}Y?*th-U=#BHza^Jw1th--ZnpJ!i%9jL`#>o_N$wy(Xl>Qe9dqkAsmf4i(cO{Z(Ju0 zj~#pRc+n*s`32z}KZV>|34XaYn}|spj}TbQ;BLFKxhn0HVJYeiZo=wC9t;P>|9G)A z7`RXnudUuR1InLvFMzL6H97oUL~cTu*tT^QGTs}BXki^cBK&E*`$>`TEznjha~Nzc zUw{&hXLLI2+GKL^&S?eV|EkYxoapE0F55C*rnBggp8%Avg*vgPHnwvT56|yh%z|de zu;(}%Q#@+cijj@oWJ5RINdWnpN;)tT16Ev-!oe3kwr27<8F=TmOaXjh6 z4pX-ja9>dUtlk$VKCH1_PzV`dBpl2n(6fI+BKxC={qSMch)pnuT2}wBl!~gs4(;X9~HmUeGuTGnjF_mYgfa#=l(X*S2@#MLhdGum* zAh~Zbd$a_uGXRnq;Ec*PbY!iW1#81cS7PuihSGFdFedR=!kyDA=dvYl4y{WYtQ|>*p}KBBBLO*%^7Uy$iyvC;~U{@Iwd(Qis0U$NN}cj zI?UlXR1>!_Hw>?=KTU2VbLipr- z>Z09mH}{u^ZOUK9z9w}yKL~AUWn7Foy0Ui%b;SKO=4P%?jsln*dODh(BNT#GGEE+j zCnQ+INuPf=(-8JCB(I61;fpj4Dvqj@UY^R;^7g))il2(_lB)huo*|7YybI*4@@|Zr zq;stVQzrsn*?Y+nnK)*1O!zL?j%@1{)n(Xs=jsy2tKU@1Z63$HejmRiK(qZz^<4}d zR$(z*^e2}g$VZjBx!Bk5@nE3{bZFZDB*t7~Dby|MfF5D}_MmMun7*j9RD+!^hyp*C z!P4b$0o3JDEYRF6yKRmN&_`>(icuJB1)J$wD_$4p#f%pQn_TrvIpB0C`@?~%j4BrP z0qyL6Jhg0}%w<=2E=zde>0=8v^nKKZf2Dt}Q$>BUKuRwV=c)7(!sJ5!IYEHGy zLK{JqkLDeu$oB{mbTRX4#$cu6#$E#PT$pFH*|Xc`JXE!B_;I}@E9&^AIvBA~Rr(K<`{Ecr^ZoR0%{%nF%_ri2h=G5< z@4Q9cpd~a~xYUcSXB(-NQL%E`o5K5XbBfg|{}=9)86`uYyF z-g8#88MIZ25)Is~z~H$bbaJ-zHhyADpt-ISTne|(E7*!gxsfJdSONlYJq^cF&|3c{M3 zv~P$izXqKNR@0@DzeK?8_#I2)e6kKxv@KwO%jbjc$;cU7{1GRa=(6GQk&b6bh=D4UpKpA zEWCN!oRHRA%j>$5<0Q^%;aa1TT|{-wqb$;H2^U#5OydKBE;10`EZSe%Kbc12K)ZFg zyF9W~>QVxK4QWiruecgyikSscwmr}yYn*z3y70_MEZXggJG_6UXZBFD0IdE0CU?tr z!s}j)zF3hrC{NzNt@4|ZmzoB0d6%_sGRRIP6dW~!G?5Uu-;%cO2J|tZT|%5X$r#h} zcrgX?p&1x#_yWdf38Aa_? z+a+8k1SSuQBwrNNSf|HC+Q}^HMgLGwa&SGH<~cSQ?e1TF2GAZ;FtZPMzHzB9Ul;7W z%bHdkt8wsJXWd$uh2|~m3jmJdV4EPZrzGiTNjWN5nz|bOmq4%lxztgz6_%vxrztzR zmTQ7nN*?=5rb^wiUWp$;0?_9oqJMUBZYt z$Ze0nzf_y$ja7@+V6*cn_7uU04{*usZU@}$y<(=c`sJuw$^nJ=aF1rYK*8S=!h8ZF zVmXpz=TWd#orIHut1+}SNIzGyXuRE~v@ieWI54G zKb+O5%4Q)`ZCdaC&E-dbc1_4hKbxff0V>Q+GU0Z3b3}U*o!7Iy9x%iRVRqVm_w+Vb zRW4q=N;Q31aZ9>Vqc?r-EAqgz>qi|+mA{?J@$#J53)xUx?TyekW7QnaAHHaF<*WX`r?IojDYr3TFOa`1?^B5f{LK6D)ih z3!Gw#2|fN0z_ud#%C?JE^fUET8B5g%zCl)8CBM9sB9)SRCSvQN3e|ctMV&0vhavV} zBQZK|Tu4yTGz08m1g;h<%-60{5`pNy(VzV77T-6Vr%W)bKqz^{oGq$zbxT#$>9Q5aZ_(C1s4{<5Xu8Z|qa~#9ECucXC)F}MA+aDj$p&s};&y8Z zPDIr%w_LbT8qI2Z^HDXFz36b|8NKVKkYN_F-b2=ukNsLzE(((0oWV>&xccebjS)D5 z##vmIMBvQV-f_-c!8Yp6Kr3AR-SMY!UgBg3oqD96nO)2VmC~BtkM0ugMH?OY*!X%{ zI4hN+U-o6IWytK&w#Idc3i6$&^ve{YOWO_8T#%VsLh2kCiY}>JBdZy{~~6r#UANh5$R6 zg+Iz!99Wnx{v)4!Av1NM$k&Pq#)dXMHV~tvg*g^&}2_g@Y`K* z7IWweKDK?vQu#us=PH8>lD@N~q6;Cf16~L1EY{*Xhn(eiO-V??%QZqD^UMDa_j8i& zMi+9!v}&)1NJTfW5$elUyAQy19sieS;bWR14UnU9?Q5q>qKL0EiuAC zHmzj4GT)jkv*sZ=l5LYMmtXf&`@fXwe^G?ssE-}u{Os4%3gC1H5Q!z57{jWeASW{iSMLcqhV2 z9Cb}f1Iz)`O&0n=eAEji^#m2zoCzP)*~z2BoNr;Oym21UbJ?OHDO`F+8T2*6CzWt6 zi+n^hZ17xYvRH^w)rpnL+h56wJX}7m*o$#B$1fPGcYNC0ktA$*JgEMzdPU7c&_f>m zq)*W{=lXou8-$kU`&8xCa8b;rqlmwto6?&tmwRq}KHn2*B#D9tCHb95cD*loyyZ9Q zXx<&}@KpYPT%+ZS`qJ@#7$5dz@a{7a=3@OAZ=H*!_&?L)YoQ7_KiS^-6uEKG+3~ZM zYcZH0P017&Ab-bSd#qfvK?h2^9mEXzWg@ptKg&Iuw>Bc5D5_hSs+_fAEAZ(AS3Fa7 znokWQZF6yicBZ9!{H=ZTi1v|^oV2b+3iGdGc7+8PXjW4>$u&{)X**r8k&N>*Jz2pR98V?W)>) zp>o5NILENMA=?1BLh-Wv+JXy?R2NM>l#gFIuC`3|(RFc#c<;&H0nzLqqDDr043%Vh zyJ(*Bh4=XNF@(r-xhPduObrgt92)NTWyMKZlG5`nXJF(X0D=U8QtntXD!E+=ra)9v zTF?KkR!K~A=2N@R=&H~xa(>bd7eUx$zK7!;^v6~e22@07#fzL>I`dXnp5&(Z*0_(? zjffr$D`)SCxvW69)0!Xl6UAyFiEs@Q59`)iyrw=+noeQ--pdycxqocZrvLrverhOva$MTqXm#q5yTTqiT?a=Qr@vc@MFWIhYJ%EEEEf>KTKJRi+;Fg9!6 zahM#*-93RyDDSE$BCt|fiUabrWsPeW|ckRHL^;h3x97+-!X0L%)O83~#?c>q`s6VT4JZS`t^lo^~spgXWjwR5s zmL>SxMo&f}=*njiIT8`4uXpL%P+l|xppp@O*D$MxN28Kpf8H?b_AbLDF4*h5J4cyV-*qA+c)v7YU-0$+z$@97Vv34|Q`Zd}7qWTtgnyAK6fXNv`{ocry@SO+jy zEP3B67XRw2Ea@d>?Vj-GiFr`nYbm-X3PDh_IbC(4*-7QR=qq(G^|^HW+?!WjIH_;b z70l&Q#yx_TUcvN4nkEVfX^+CldVv}7YFe_{Y;#KYpZ7k}gHs#hv-1{jXh1kSzf z56k59U1(na{A?PCLlWMsn{;kR1j(SqQ4pxVT#o#e{Ud)XxNK7vbVP4PBR8o5EJzJ- zDL>M+RG0A;9*FrJM3`K-%9Kv3F*zl4KA2m}`?T+{5$LkoRRtT~4lX(}PKrfI0NRm? z7(RfjF%FJv-EykR$=sKp4|w#_Ox>?0GAp#^3(Kno4QvRGbd2jPxvT|zibNqf8dZ`@ zZ^nr=NBc$u!bA-H)>yLR6KK8PHY|OoNQ{dyoV}fst3t0#32|39yoX+|;V^7fUo#%LAOEX{(7YOtB6RRhHWTy9^lm3`K;RR*0eIwR3uuYC-7=^UvxLW510 zZXB%ZMIoSTkw(=O^_&KmogXg0C0!?cO+Ifx&4^8D?`|8g&u;e3*U z8uxc#`sCqv(d@x~e*U?rjSo}Pwf8LI=c$v0J;8%9AP35OiaJK}uc-6+*6!52tD9YJ zl;6WKJ5f+-xdIps)B251KZ#Ui-%tQkDtYt#3fD1y7vPkqP&fP;>j9pzZ7P~IMb@K+ zkqsvNM^j!FAtKz1;IOmg{pm}+bLL2b(g-EPf;a8!5=2SO29bWbIj?e%tVb*ag4Hvk z9Sg0eMQ+}o%xbCEwnS_T1zQcO@Y$`r&S1+K^d(*Rn|%_}Bwp=@Pe$Bh;aHJ@ zxB@*0Z02mQdDf4Ff5N-YNH46nu}7b)iMLK0JV|3OF8cK9wo&ZU**`*^Dogdd<(KWl+&}g!PoOsNnT{R`bYs5$T4X0o*u(=A z)^c=JocX#Q3Vqf$t2m)b$*5alXebAqw7&_O* zoqySdO}riawSsR!KwlhPl5Z5Av$i5=ICV_JN@Jo42%1pr&Na@+SSD$!RZ@RcqOQ4UaH7pD~Jduza4B8sqKhM+n_>tG^_`2>I@5p!X=vYqhW(N~` z3I#O%%=Ik+giSh;cOA967Ctq?ACj9{vh4??hIUL9$IRZOAzi^P{~>1i~0%XR^YA77ulH1oY>o%ni6i5k zgcCXz9XKhI&N0nItBxz@ef=_;MBZc*OT!0yzipDPeyo`wD9c&PR~dVinG8drvvzp1 zdVg%Y&BkYGI_zQcREz|s>KNG3v+flz$H5$B)vL+7Ug)x^1oH~8AkUwLCuJdRY=kE{Ri$`u9ZmjXq7;L}m zj(_(nez?QsBr4K~HPFJeDW4f>`~~e1dFlOL-LvIZ*K|HqS4!7WDxamb$P@!RW=eVi zk%&}sKJ=9{GRa%3X-pd}H7i>LI8Yj~@iWUnV1^%^8Bi(mtNew^f&vVaj3JNe@e~xZ+aq9A^RUJc)FQh6XYCMJUiD2 zOn1i2j%%Gb=Znxj&C&9oWyi!Z^L5Bc4~JZuhUf1=n^hYC+t%51@0=L-sd>krx}TZj z_-L)n=3w-r&1A`fLg#d?pXJf1^|C0mb=0>5{IuBxTM1=5%__tJI~c6VCOJ;yEa0I% zG3a4N*;^)Wq`sOFqcj*{H3{IEeBA}+fe(QVQQ!5r9dn1-%`Ts?ZkjUacs~3ql&zAt(D zZxw6*N-op;UAHj^#OPzfKeA#?NLop>9o+NM(2E1ieY>qAkjaUTs3wX~iGF7W{9&Qd zUSUqOHGDOI3t@=5WEA-{H1;R}Z90#fWx9GlBSNt>p2bSS@J6u{KX$` zBHRkJa8}&AYBf>IZz1O~s+o5(fz=e-+JQI>UV3BjCvj9f0EjprcnszKRO}aH!;_<% ziv$sJL$#NU?p)JM(&McInIBgtD2F#W%#OZ7%?%>GTbh7%Z%IAxogNnOa{3fJ_)M2z z{56tie^tQBCRE?1v(31cnfiG3O-qrDagGu%74`tO-D7Y@fYL!vdBV~vvvnT-@3B-e zkp=r(?~g7l=h5Vsgww@{Z%P@&B);pJZ?qKBTQZwW-8|_}{iUm8(u@j@Y*o{|6CFG? zccU!x=l(`*H_p@ToYv4Uw!2RJd28f3v5+?s2%b@ojblfh+^C<}o1G=2XN|^7cg3h{Y64EMv*8zzm7ay|W zY9KRqR(EA(Sw0{Suej(LH4VHXsdW}MFewQUebCr-qtpuG0v02Qs4VF`+x{yO`EC_( zuqqCyHyBa2y*g8wG&t_)`CGBOD9JC=lyEmt{(Y?WIEm5azGhADcycctsK3|zbyLLu zCW!LRn{ac$?r~?c!JEQc>3o;x8)pHy^@LYJy5EEg9aNLXadj_8STFuMbuy^4TQfng zH%~I-C^%)ue;>XY(w!;sF3TF(U~&0;F)43hywJjH90-Yacvpd)gzk;gRO zw0u5~?9KWMh)g{9gwGZn@`nuaC!AjlFl}%vu`xSuW2&%q)O*|p>O&$BW8#I#bbG zw(rPog{0piDoNL_9s<~lJ%imyog06)Aa4YF+_of4G1L2PJ(fnJVkS_GUH4PI)1x%r zf1=-o{-ZX3WXNV^{3d5X$Ns#d)9}|8fa4&&hBP%$gE7cN& zZP~k=zW0Enq<59In%F<5U(~SSjK+xUH;0<4)JP4Y$$4`QJeTRtmd)9$u1dP8KZ{4= zP+8GmO=17w5YPxmDz6)JR~{?9N^^uUpA_db8a`gXaZe;UHZC&N5TLL!vN0d^U{YsV z!YcXUR#}~WtUzDuu@5BH*Rpt6ZcY(+PX!U*-TO35_QexX2`b#FS~cmQu{o-Q*X*Hr z6mI19H=LVV^ksQZ(k-gTp4h zm_o!rCj}=SNpWv&q@KF{ZI21T?pIv10Wf;A&{QE$$BS#~nz6A!?&LIM5>q)jjt=l} z@xIMW9x=ANNRpgZU9#8m)vXcss%62?Z0mNu*%T2p!X~xVA9L&&04HA4UI*~8d`uoP zQK)&@AxW0N@$}K^n{QA45sMp2=lI_w%v>kmc}On%Etox=c1U-CzyC(Sy#m?W(x>UY zw)K9!wvvCQUm7pta6fjya32wgH}*e!ktcG@hPOGM&n&iy?e!B+aH^;Cs!Js}Z!+ZW z9Eqr`jX*s#{6z7YB+yR^Lrij2a(oTUCrURjba?eno4+UBnB4~yD3uqpREJH=Gc!ON zx=I#HKdN%AQ3@w3`rz2Qmb<!Y(pN@_U~Tv@f*$cC$~QY*&+jc%IGOQ=;S!KTmE z&P()X-6)IC^`a+ugQ& z-*u1L7J+3Hn(d(_p!|wR{5V4#UF^}UJy#a1FE$qY^l3-qXb*fWkZlk>AbaTIXASnd zsHSeotf+J<24%^X0L~qwxxR^a)6^rAB6T0fGrCJnDy@Y`Azo`W)5UhtQ~{T7Vou+> zSkN;`*T`n)*sV9ld50|U6v=8%>OwpFdKUW(Sl&37tvA6PM*0IWB|YpemYVF_4zzL zg$u#kRL!OMM$jmv5vfqDJ@^dpa+PSqnAtD1M+{iA0A#(;t}q~Eq>2*K?lPbYS0z7| z7+>e%-+J$D@$_E5L@y?c!U_)D48OYk?T5pUBYtpoZEOu=Gw~drxncY*t(I;NVdZRT z(`pqeoRR9q1zgXwn%-s;D(PNjPoapziGosqn0?nU?oVHJRN&zb@`U~Mw5C9MQ{DzXW3Tdx?l?)`SAje>wQCKh96%`?z2jn znVv6{*QQtRyhQP=32O|(fBDgwU0B{Zp!lKoahHSqC#aFsi-={mtkUl17Z*;yhbqM0 z#b3tw<*B$R7`d?Ug*B~xjC%(d=g+mMTqts8AxOSFZhJK?_r(|7&BJKv8#+A5_^ z?rK9yzaCyssYx6!R|aRV`>x%(P_16w%!K5vpR_LgM)@{#a0XaY7>IIxpx*=324C2* zU(f1jlqPEkhrhY`_5A0*re;WLf1mfQ+lf(=Gbwi%n;YG{Upowf)+XR~jARQM3EgYK zROLxK)b(HA!|1>VwsBskGp|@vU%dZOhZUk0iZfsmq&1JR z45)KRW8Kti)f@WT6%Qu$Ohw5R7t5?<2^}xkHRL%mO_gPotqyxY`z_&m39XXW5;O|8 zC_v9m8{oW*ZH=Ofa=(`lO=>{m8iZojA0;l4NC! zYY3MJFgOG=(`G<8ZjG=;7}s5D%R1{?RP_wsz`kWLFznN_mi~M#973F+@?c1H`Ns3X zGvugg(QuNetx}_G20+@VG}suwy?LFzW4B_kyTSVbgz~B8(jHZIk1%1#^(}qo9UsF0 zHY|fB`)5D+C;k36F45L>@n-Xjob^_mYyqWi z=ULf@-jJhK3SV!+T9L!LabpQ8sY?Co<*I*B&3C~_Y}WuwW8C~2HxPD7Xo(Ts{cJOP zg6`i@wj9NA#&=TT)AQL<{axhFci)I2hN)$N$n+5qQe$4cac^Y^fBMUhv@?IQ>Zl0W zZLnv*ascv&_c|mpp7jZxoQc%Bf^0^{YVl&7$0r8^a6Yes~RH!9+=P^>ZeJ-f0W)LhX&H~M>JNvqN zd#Z=adw#G=8dj|a*p#LIRR1sK@KDbWbRDHHi3^|ThI6{jtZB&jMbnHKnZV}P zin1LYlLV{%)FEl!^b)mW*)tZGqsPTOIrDnfiKg%*y2*KDPr&##TSOXTG4P|}wCFMK znLn!sXBKZ6)+V9Rukl=g{PdaXwZIuQFL<0F__n6!QUv<)_sM{%OVLBA6_-|OXHSUe_Oq$tEI$9WZGZ22f5NA>%CPoR zRKxe>$o(|DZ*v^x$=+QFu;wrP#sE+v`*hK<)k|{C`fK$JRRemDNzL>wwXOLq z!YH4sK?jcr&1V`0pdC~vy4oJ8YemRnTDu&_+r9Nr@NbWu$tOVr=DItv?|xXa5b%1q zum#N&+rbF_N&V-T8T zN2{Nu&fB{J+0u!##FIgyalPI02G9uy>+EQ3hH+PTCo3W!pvM1H(Zq`l>0xHlWk5K; zkOKI|5j`4f?~5m4>{%E&MzA7ZbgqiBw5y?Xr61Lkb(P%~wH~u-Eb4LHK8YvZZp)~D z_Q;U%KjxM_GS_z;bBct_w7cvvGA94Q#rtyc`ZE>Bc|}Z3Q$sr6yMNBs2_SU4I#!^r z%t*v|#OlWej+RGizSC-po;TX!IV-eJN)mLIW9$+VGNwT8k0JLzn*8ZJ9ZXG){Co3y zvP5IJGA7mPtJS5?NPEmTwT8TB3iGR8GnCN7B$fU2-FyuLb3aD2kfRHbD3$olR*2{Z z6G0jxvD=ehii$IorGkL(Z_wNrajxLsubD;wS<#b*7i$4m*Hb|!A(GHtmyYR*-WCV5 z*M9!pi9?^{yRZ1WlI0#VxbuHA`;c;ms%5+T<}FQYjrETL0zijl{J%@yahyur=dJQ z&fQ_$zMfzmcSDlTd2G)cbX&EZtK-al&Dd(Oyqe-%^0jRLWP)4;+OAet_vleJ)_*+4 zv-({)jr>FN^+II(FU@oqP3vyx^*&niY6I(O*xiz(}- z#e_{H0PH0BY%rd$59_>`feHdoV;@9aGbjfPy@VpNt#vCsV4#}~QLj?be0e=R`pZ_xhvn*`(4 zN6^cP4RAx#;Wh@9B==pq!S}xpaBJP%`zLjQ)BPJ4phMcwyy%}%We6@}_r-Npaz;L% zB$nkg=J6oqGTqeyJ=pN)HH`}#=9Un8CQ`?~u1-$JJ3qC^&PRTADOxf8%wt#AUURAG z)2Rk#iMaWr+&ww-rmajA;v7(Q4YirsSzIkq!M1f2L!>cV<@X(B((vkNIvW8lZ*y49 z?lmw69N|Cn7Q0b!!;L;S1kT-%2>9!3XO|Vc%hfSO|K?WF|Bvh&)k=uI31z!{(UT-v z>hlX`9@FtWeYq{#Ck-VH+R}#40wr8~^7DVx7Cj=BTA4Pp=_WC)eOn(?^33vf9K-6W zuP5rq!4zuNwua9(jMXO51ZOVAi1EN0LHH@mRR=(`n4yvgf|2=u<_F|c&^gSC{N%zr znx~MhSA!R9>_n20{!Z>4*|FZHbSj9~J$Ff`Y)fAAV013$3J?cf{#mEN854f zv$z*jd$3VQ@sDd-%I{Y(ja)*&Jkj1)3*jgUbgROYN%x((ZjFawPRFo=IGThW2ee!!tRoTG*LTxy0j zD(YRncHywj8KVJlc}54%V%RPl8lRfeU8d;Lh*rwIQS}qDJmsKaSkm4_vdXL@;_m8@ zZ~w4yqQt}nnz^$}KR$U`o_hlE7Hh*Z!mvC4IwM^Fm%!VZAUVgg%XypvFy!^wM;p!w zi9)pA_5!xBz~;F8wdHX1(oNg`ya}dvdu$Er!gVk7e4_aIQTy`;r2 zKN)J>?4??9gI~JS08WL=ETu^*r$`a?T`h}XY_dI3>2vGm&Jj>mBO>{w(`1l>CD28K zu8H_O@tAm&{3P8zBap4}khE0v>7Eezx=Ixl50UXw8Xo@0v|q|~C1tH0Z`%+zY_H*X zIEF}fjFS(zOt?AY%Kjj@3FMFFak%9KwHp z#CcN%88rr52^ptGP>a8!Se0oVn(ZANl=J3a@=J=Q7lTEdTr=wgUil+QY9V!t9F2~%n1ZPd;%Mv{HxukcNd`GMDMTG|Gso~L*xnSV^y z_;&l^h{J(STm(XFSP5;d$DrFAz@j)l+APE)k&!g2fP0icPc(4*o1sXZ#fd|x)>D?N z;#D%?-(SX6VxACbGhkimFsEH#KpTYRtccrr@-Qj40nTH0FJ@VTBWnPf6d}C|8Q)5k zX3{O{7?>$gc)iKxKy-B+KxP9 zUdX5aCIug$K$uvg7skg9U&T=SGvz!0?T z-_wG#I#f(+0+V(SPC1E6!s3>;-%-2<_z%f*;Io#i5QZ+hS|Q(KPUw^OMqYgpoqmd?(dFE#LepjRRC$}_RmK08RuP>kpC6lJ4 z>Das*cL}c3U(HV?va&LZ_VxJq8=;-ZYh=ZKr}Lj6{|h$J%cGM(=4$iiXnp5j&7%*` z%+uqLdmeMfhZ$>Ib+R^7I_bDNeLVkQ*b^l#PT-1Acm*ydcESyx_~t{2jq*JG;Bdf7cXinsuGBYZIScy@4n=2w8<{ zL#r4@AYYGV)d|QNhxr~p`)hOF2)hp-o}b)uI5?C@fAe^VFvPpqHHP|)i-;U4>`ql9 z=4XhmR??5VR=O}3V8yKNg*!Vd9E8kT##Wddzbij7a$sI1e_KvxK5g`HU@GvmUp)|; z4`J&U71}f~f1Bddx>l@HH4J<#oIE(;yb?<83yZK*?}Jt53~!&grQ*LD^&?fe7sM_h zj!b%~xiGS+n0UxaIxX3i+yV(VGYmi>xuEsYqO@1Oij zb|BQPZ&V0{;=Z@=-+*5Qhg4l?4uB|s{2MKyPx}wgJ(HIZLY{kj%Qxdo$^yw}$!S<@aP?tN!&8k5x8}-v}E!h+=X9;C_A7wB{ZGGQG)_9_z zn561O!{_wD8ggR~?%6NQJo_q57CNd0ETA*WR&ygg?gL$~Su@Y;FTXc|?RO3-zN>!c zwqMcxWyzRqtQR@M^6Q@Xghu7lDW>Z15epPtY-EEEZnrw1kq&TleeFw<7I)hC+R(h~ z-`FFn@s93`&nZ9R@N(I?J45F;JlLzD@B<{xs)4DSk@C@iX~=~)a$_in$E9 z>ZE6hR9Mkjvf(C?olB|TwvkJWD~jOa7!N=ANTQ+ki{xn-C7_YzMFMrN@WLoCJ@Wl* z$%XgMUZ_Q$#)o`Ae6yfASKzBRop$u&Yhu~mwcRrZ449J>wJM2#TWM)eIbq<6<+ZA7 z7Fhe3Ke!@T0=peV%@|x7S@>qZ=nv>9wYS^z+;cWRUjt#wALh(c23@ji;WDWrIIR;s;n3jPoH-N)zq`z zE-j_hKHX2(+_U$sexuHz^@(bA@&jJt^PbdM$OA;qK8zk5PyipSfz4@+w`dMz1C#;l zws!@*cVk|rmV2P4Z}M*Xh)II|DqUQ2;e<7lN~>}|C<(h;nIp!O2hXSAGf7GHlLY_2 z64o`tXPZz&$OC_|?0)K<*oygpjMWhwF0Z^VzUrz7`Ds^${pitb-RH)uyM)4!keNi* zYAPx*hck{oJ`2=qG{Uzt0}MXRe;h3s5+6K>*ej3>Qbu=z5$}_lOV-GIe4@W=KTFj4|E;fp(uZy+$5Fos~6LY4yzMV|k2efaK-={q5mv5!6<` zc1YUXFLT0AyYrKU7f^l&!lNa%fd^Kc^4IqF#q>QW^_^7E{6^_t!q;rL z+?Q7I>kA!2f}T?Q9{Sq5aKX$2PJomG)5aaU5V&)0Lcu^(y8C&wyfkpLslAkG$2m@7 zvlcTI=kKr+*4%~8tNS6j0KM@bxlgw&lwKS$u;;C?ly>?$+q+x7(w$rDed(b`#rXF(;MHR<^EEY?zXnExMJ-xQu_*mN7) zwr$(C+oW-tq%q&vdc(#^W7}+O+qP{xJN^FezxKi2bNZalGqdhBYt~qFOOb*1yG@;M z+~u)Noa^S7pcTA*A1C=CT$-e=9-MiHTq2R#s9hpIwWDzNNF{*>X2Wnkb;v4C+i%QI z%xKA)B6=1$Kv0u}pcCbWQ@^;jwq{6h=ujLL@Ib}sann`;rDC-!hn8mwF0sOx#^$~T zpvVH!@WzvwYSm;Z>^^_LW|72}$Yxv5o3x^w;!SB40 z8uj$8IxCb(za|z;m$fuzF&w6foPNf(q?{xRCwx6PuDCPqW$^&{3l*)v3BFxxzUM*V z0-eS2nbPC=VbiVR>_SA@JH2)fD&dqb0~pQ_l0^)4z9tR9AAxzo!qRntHh6;Sm07@j zp?;q%XKVM9%wlfAjNjx~3A}{XT&}g-y(Oz!dy2o%rKhUN2Nz3w%)?(SW-h(F30Mi@ zm59`xRM_3tuhRZH^=Nh^L0p7f++Ufn{9t$LQ(?-R=M276b*~ydsi>aQ#CPA7fiPk$ z4ToEjpyk3c&WQpm#SXlbLP@j+OnWOKR@5`RxQrg&4qh(*L*V$vL+~HO-*`_Gtb;YoF5?LUc#`Q-?R7m z{hZ@s*J7h_fEpQmJSo}Ye*V%;&}XszeYSzWlIYD>n=4anesFiF#%MK93a+G%X@Kh( zPWC>T)?!C0Iu&Q6B2$o>j+Zqd)xRpdrq>Bs^|rb3Z(~8QXvaHQRMfM# zcO{Q+W-R{P8S-PXkOk!wVtkM?vgh*wN;i*{_YwX?RrNopsu-dz?@o{zgo`cN%3p zjbjn`>!E$JR(KRfVF}1b!542&Z;4dDtm*E9LaHia;pdm96{7oEc+qiC-_!_Q2} z+n|n(Tns!?+G_127o{NEQ;g}Asx=WaJS~o{V$V{w3Y|P@+aJy`q0_7jhjc&xZmz?> zY7+`Z%yGB05(PufNuCOb9>rXb7uif(`gEv}d8n-cm0zX$?`jOIj|SWmXi$k(ExS03 zwplCh@m`4J`1m|s3p9+^tfA0e7aM>rClCDiRCSF=LXT-}1FC6Nq>_sc5QO_q<`x9` zM|{n{xa?L4a@gms${E#GhY1pk3JbYr#V))nBx)gE#jChtPTO6o>faK2l*BBwBPV3J zBX{w2v|VqljU`oizkLo0OV83N>tijFfC~c!ZSFw4l1O zHZ6iZa*mvb`vOdk2N>wvR0F$|(NTQm8wbn$lZBDmPhJOJ4j~_Gz^l z)J5O&2dxM5JO0PMFbHNnwIa_Z3EdPh+I|R}J9ye~ffrkS*tZCOeWgd$+irvTL+3UE zRjLM5MfLyIwk!N2-P*Jr<#DPlF=lbce6Qqc8zvhF+#wm1v89>!6nBpif-Mj;lJ|9Xm%E=cKIX$^yp`43m&tT)+(?BmV)_%AN+l`FVR4>0 zuRiM(@~zBqZo-hb5?6=AH9b@A^gVxwM(0jSVoV?FPaP=5IqOpNI7QS_N~(6ojL!Jv z1T}kT;&Gc$sIFFLmO@TSO1dReMto!r7yMJ{Z0Xem*imT-?ib@DZ&j%$zRAB0&C%MS zjO@N7Zt+XS)fPU=;|NjTnuD@N2v@`uu}I@m>LL}#Hxm8uTZ-P0@O7NPvA4n1uUG1~ z_10TV8$xPL%MD8S{g#K2ig+XHhRjxmg-IOf)NqAMJ$kl z1Tx0R>zQi2=3FVDRCaM#{0o`$m=@l93PnJ$u2V<>8utg$_U1S4a6wj01UB#p~ zeTs0~!6bWTy*t*Zz!@QN0lDz!{>a<1?mxx1->_)EoLWv?xK_Haaje=b8K2$gW!Lsoyt^I29qI(2|Sv ztCIR6mvt5pKeUwFMRjWY`=-CFL|cscnDKdRLTRd7` z)~}W7FBY7gX79OYkUQSRTN26rA66p1S7|Fc$n|Y>cUp@TVCdpaOIoU_$`K|lf>{9Y zDaqxr0ZAQOx#%rW2B9CzpId!nj>M}A-(_krl<|NnuC$yBE$>>sfNt0vnORlfksPv% zDmFS1l!>uGf_ci`D+M_@{0+n1V_)&ufs!opfZgz$MTV>loq|UIgE@Pn zI}mO;Lxp2zcO*F>Y0+_i5`et2LY6+4hMQMS%9qZ#A}f0M>4z&oIEBLBqgDyCz`R+* z2ZoFI`NKfX&eSy*wXSgUVK&)DDj9a=S2MB{ZL2IiLBV~O+nMWRYzDn^{jKi8al8*w zEIXxo#;EO=J?Q?x{jt!*nh{-wamRR+c`ARAb}EsstMHz)KZj%n1dDcE5r&JR@qnW3 zrj%DVTSuwRac)H|G}g%YgR04lS#FxK%;HU~>qcv24V~}YCgLyAgQ)M3k8iGW`4=$_ zY$q&YUqUKyaC7!*TB}DEtxmJvY{f5h#Q@1&)f^`DwR~iUdV5&)u`TE{w)pXtpLV== zA}f!m$SYq>)$qfwBEwhU;ryRliM*WEPr4MI^clnw83x)Qu`w$x4vWYL{U`wki)Jsm z{9?9rDbOP`cTGi=em;5B!Nd4FJ5@>5M~RkjFN$DsM_CV}zQ5HmD-?p?fa_LO?ZHa# z(-wCT0A_`d$tpYb6AGoUg^7uR1k3br#tLq>rY8RK)SMLk=!+%~=Tc?g>dCvUE`EoR zhTyE~EM+V5Lg4ChUP`9yG_UoXLJ^02d;Oou(v;81jWoYemqgQ^O>Ixj3_u71w*<+Q z4Eqve@DCJHw&F-XA-WI6h)MD(0Y>4cym}apK#2V5Ukg$g5;CrN>mOWjNWNv+GV z3Q)gDDJX&vDAx9n%!*6f7hfK^zs?5DH{Zuu3M2qj$l?oI&N&Bbc6>RJWP-8Zz4anloOXpD7qhM+cuygg&h+QO6Cy!E4}tql2nc zI1hA{gw#H0r|Npf66I5$GESv#Os7Gbre%qvO5NwqeQderI#5M2q35IU6HZGRGdc$-Na8{Mg3?S{ba@Wwh_FwOo z*8m3&DTuPbgO#^7zRB#C@jPu z8#PzzOgMl`*rfudk*<(Up=qpG5!Vo$dt5hmP7zzf%TliG-R zU6j2Au@fu&EY7b73<$6RUK=;G3aA>3XCeS5>k~A|D5aytP@pL<$Szy3J4Gikv$uPH z{>5_Y|2gKwmN%K# zf<+xvbSd#h_N=SP@^!$g#nae7xe9#M)Knk(; z($gq-B6lioD{wpW7lqVclwYxbSrIMf6QhH8!WB-X7q-~3y?>wFZiT#qXv`CtXYimU ztd5HbTu;dyCXh+rts{GLkH=vA**LkkfiUX<>qUOigrrR-530~54b0$SpSxzkz(|;z zlr=&VJiEP+s?3U2ktcSX7Sonhsz+0fex(M&gL^NkctwFpqz)McWRC$_537NaM}!ZJ z`W3keQSJJS&=*Mb{IxA-gz%d2k%h1P&lp^-t_YyeJNlNgdvUP}q{KRoukNTn+cb^T z(}ULGCCR95fO$?7&23ss<7=YJx74bF0I`x+R5zhqm;`wIX{!!)SY+ybsn4$e)dB=) zIGW1wBLsP-pK9r^R)EjKP$=cWU{E^Y3z7d>G;G#$)*0L;09=1-fn@MkS|A4GpdWr@ z4|z@(e2g-MrN~eDuszw-%jF{x!%4oFg+*Ch&NR0Xq+MoPi7ObJZF|(VX*EnGQdDEz z*>}EXw$6Uj!b_NTAaE#O6AX>(JbcKjw-D_P8-(n9A1Poc%iekb>CfU8K6So|?fj5I z^@aNve~4033EX>f@}Mb#P{a_qB^u(Zd-m*CA>_%FDI5&Zv}VItM3Z!d3|l|ge~3Ur zuiZ116K#kGj-wLeCLCd+O-stMWgUr*KzCb$u=Epduh{8=`+hzW2hlTkYG=-LmuK|6 zOgod+onkVS7NFQUyV~l`Q`m36KSmAOzjU-$yP5)Zg=s({S@P2Wdp)Atloy-uWo2c1 zAKPo}XEGwL$gL1OUT6oXJu94bP^lL@aJ)wpT`0DQaNJ_ayPT(|_!MH>%->Jo9?tcE zvirY(4@@Z?L1}L;L2##an`2H+8qOqgqCX-xxYsp{ntmsrzz+)ezS>?ztQROR^dURq ze76-Mh%mu(i(qL`WK+6ltKcouddn&C$G&&0kw4Fge9$FVh*T32f;=3$FqdOMgJX2V zG}*zB6_aY;bpkPia+sG2EgGdq4iq`eyvUj(nxC>YDZ*ggWeWg}K9}F_`2yc+TyKyp zP$at;sfp!tOxM7xvs@30@v^~}co+4(oqwEMaBCvRKoITWRasYU*A-&bT z4d5j-j9tWY>awm>-a6Wb9V<*e4ZYQiRYG4HPVQ-sZIQee9VF~X=RkrLb33%n%&tDk zz&Rre<6 zpB_lAseUAl0KM2(nD)3{m;G7|=74QfJOmeT_yM)KIHx|;it&?K8(uo?mO(^X$9S4^ zfb`~oP;w0+B^;eMT0@AyiR2E#6w}%HXLy&@jBNu_KBky>(66YJRXvrYeyTJYPeRR zdJnkCd1?;WZ{599fJG8WrWpkFG?K>TX!hE0-7BiPzJ+>h(wu zr%bp4ap3^#dc)s>o`YM!xLT~L6-!4{c~3*4yNZNg9N;1!iw6auCU_4o*8Yl8ilQF( zVH%OnCd@7P8zDs#ID9Y`W;QCRqYm_l9sd;*YxVklMC0%aO8!|p&bT-EaARs%BA;O| zZDBCev{8I@6^Tbpiy=Sl#F=YQa!Ul1^2!zMK=;7$dIQEi^-u^l0E5N+SC4-} z$o2@wr@~ML1xY{_0s~s5fU>vKi?u>wwTl}Z`^e+2MO&Uqq~z5b{BTVWU+A7-so*C3 zAbB_{`7pWEV{V%s*_y^#d9S0k!}%S+yTmFagWYYi&@`3;TRc7VPg~|UiZ=*2&Ox%o z;Nad<9aNE8O0Abr$Yjf)9u93B@-XLmD%a2Xb>EG}J1@Gn)3(0?5+~);&e~}!$5|ln zLL(+AkV6nAu~u%oeh0DPwMHbDlN>+m5iot6<)}sJR{SMz+qoCjEl7L?er%>&zbKDL z{w2U1L@9*y_By^{o_cjh_B}&dG&xj-$PV}yz^CoSw>z~z-&DRR$TfSEkJVWNMB?Tv zwU>^D8qmon6421b1p2U}sU7sIa(A(joolyvk=9#He*-z}0sL23vM&NLz(!Z)Ptr5fGn8(Sl9^?6 zl@*wyNh&K@ zb z1B#=5Zc5G5tmPB8*Y%r`=I>oEF0QIsfk`s}g)BSvbwlvIUyEnkUiUQ}QfV*wsnw5$ z>J6>N<$B<-30!>SAMuV@dK#4{b_|KOW|?emZdnMhl+9%RavkWaXQ{+;e6j<>4U#v0 zPrb!#_dodL zPIhx}rB?)&;+JY2V~x2oRvF~2ofd^qWM#Cxn~Xa&p38nU-3yR#5RESw_!ZUk{yUSa zSs#{9Y4FTI&y9VCrqb6a)_2;`l?PucHlM*alv#-b_2n z|4QXW&^gV5-tk>$LMu36yI$3G0-hF|V@+qT;#t*Y*36_D=#C$R`SW9XeE<4rUe@ww zLLTra9AhGMokNZUt|XN!qxZlBo#|}~c(u>0zo8ebhDfdch*Mf@iiQa|Kf|r+`a0tI zs;Sw!2zOS&J2|(y-LmE;BgF&Yxw`Uf)6Pi6K+OvLrJp$P*jJCZoWM*Tw zfqxk^6xG0sHkMRg9tCC$E+JZ=bBh@;`K zVG+>KACsEm?(I9;x)goDK?+@T9x@E!SjB{US4s?on|Fmo1QpP)Fz@6 z&B~}Oj_@nzlVGViGa#G)bNszdhT-=P7HSS1H?{<#(GqneryFLmJh?3^iYq2Z6518d zoI|M%s@B$&d!(@f;Imr4jMti?FC|D7BL27XuG1?kIH62n5`AzeE={Db{wN z_X!Zn9qc}3K_y#rRW}O-Hbm~d89Bqy?2+jm#*0LVA%_cKe2hC(GC6_V6w;IvE4Ww=M0=c_cyDtov6!>1S1S~u$m7`;0;shlT0FoTeLY< zg!s;m*bFBs$)j#Nc0FXv=z@2>k`6nw+_opA!Ahf2%ntyXqStY+dIFeM2xd2wjU>VI zQKz#`U)n4}`^q5{hekEmkC55AqT}!{r-dc;NV;c zgLfuheNQ)g1uwk(dMdY=^gNIm-;FVPg*j>gjgOh$16KB~mHPO5PuqVL-A1!~VVFLO z%8oiZKW{z{o5F=d$v!Z?;>>4h;v#v7n=&{rrc(w%`uR4TSHLa`lq{GMA(e_rc{}GN zC^PoilJN(`VJ%<+iC30xPc`;t2hGpAvFWh3XcpG%Wj8Bm(%`{8xqLFaQaYPg@CbW- za6XZ>bAZ0uPN0y*j{ciu6p0gf#R8@V1p)+_o?!J;?)GL|lTo@4l*9WFz{3K%%BUvc z;KG}@NoNQx+ZNxYXzM+eLdvn}n_2LgNs;1v7ilR=qfF#p4Kj<%k>L#sF65H6wug}| zNriAr&>Dgagk_QSWmh_ZruRPFQ_s3EJo0C^G#YLu`g}_w{1b4suSw2`CG8Y&e5qNFv9OfOzv+}$iy{1^;K^G=0dJr5W1y2U{ zwdcd_U~56djOo`lW_PK3qRy};edKCE^>J`N&6*_&q|CgOC23G`q!N$?jT)p`aIIbL zd0D|o{Q&|m5d6Mg`zl?#pIql7vArT7*xQtLNsn`a8a@2hU)DbE+>t2QD2Tg=ItuOp zUjXe-;n#0HW>yp!Dt3~fnv*WpB{`9Zz7K6|&|O|ORiEgq9tt9n8z(cYI@0GW{)A(b zL1~118#tW$qsk3_{%{Tvn$G;#O*M=oIiiRe43Xc-jVZM$Azj!bB{aOZ`677VRC2{4 ziD~K=muuYnFy%S_n=ut)i@Mk?Bh3;i259kC&{q1%*s9mL^Eu;S4HYEhXvC(^!|4`i zv=58G$FcI-aVZ8y`kcu1;l1#!+S3rFfj{)d;llO)6=rcLN{H;kh za_=uTxiQ`38L9PDa(XdoOA*YQg)BrK+a1&FeYfw3rWa;}60nwn(?`L!s4A}X0}Vi$ zS^!&4sKS%$vSe%;`Q)YblI2Z-n61rR{|)OF#xzG)L>459(td1Ekqk7@Z3tsh!ES#5z@2&zl;beF^27T529!&4)G> z>*z!@wmyw`*H)=|DR9tAtK^w~+}Qh9(XCKG#1H2q-F)6Iu$ZVHm@Y5(^8~^!B*;d8 zIDx@V>^U^mAo)mHex%1LEbftStT$*N)r~3S!DwWp9rhW z=BviV9L{5sw?GFXl0j3Z^wWjgnofP0!_t@pq$2krn`1?YlgQ}i8nQZ15{`Qg@^5~o zk$do7R46v>mxjx#U!{y1{eL=WHO144X;Lrr$Ls4hv+t$g)t-Tm9BRP77OV31{CDsZ z&g-IVGa22t0iEVUU_bFPC_>1U*K2ZpBA`m&(|+}`1~dFg{a+t`%@Kn`Yvol{%4d<8 zvGuTEg5)!At?-_kQhA%Xl4<_AlN;$F* zB*vL?wVfhSXl59v+s{%IFzLtO$|_SAYMukpDBlCQzl@3wMQz_B4!kO~%LK@4E*AMX4n%&xOE7568+(g3o|n2Ep?bA6u= zC9F;`JchaAAM(&ag%01zIG}!pbaF?#IDz?cZX|m7IB% z#6LhRY4gd$L~d?s1tvh2Nc(_s@Zn;w+qXGSfn-ym8R0heKV$|t7A-o|_C_QQOXKOB zjW0T6HK#90;oXnR>1W-hT+W+)8!|%$2r>{k0U#|LcQZ>ZiLmXXhp*QI>HZdoQY9wVI} zPeyDr&%fV$<0LUstt)#6g}hMLoO&Zs@Lb4^e%?Fi#e+a3!xwpCVF=5-9qZ$q*!*ri zp?85SdD{K<)}4-j(|&`tFn%)7EXML10|*3^DAJ*O#dYOvw$_C~jBms<%ZTmC@?lW- zC?*ho#90PiR5VA@5Tk{$EoI8!H8O--92Hb5d8)8#LpnZa%qL73t6~D9%R(74EA7;l zR+RPbXq#r2U4!{IpVKMUU}Fi`<5t5|Nbb1rJL8sg?Fc~xs{#cZ`TEpeDL{wn3F2+o zC|SzXlubgz2)?T=g{ffFU7ICkiM1Zn!>VFQ5|A5#0z`*(k=uEhm`8LIU;gx=qW}C> z41W3>CS{eJFVW~I7lo5U2TfH0#h$@0o5eqzI>cAqNaH~AKxNN47r!bE(V8Hm%W1}5 zMy!ZjMG`KvF1gW+f;r>#4h=O_f&kSM5IJ&^8X+75L4@Df{sroD!id{5L)(oP=DHq{ z$s{a71+pqe&zkl)yS|+@l(&FKDwC<8YlQYB$gP{fVN!4J+4FI&bH>|_UoL!|wfzy1 zY?cvwz^BzZv|cyjVG~w--{aZbjeaaf?3s(ZXO&lZ&14r_50ac|tUhnr?DZ4`bR3h& zKXMXOp*A$|+i>jdj?1NXVg|jMj13U_a#RHThg4?c`N zw%|a-&JWw-4AM@hR$(=lVKl4ENx<(Lo^Yf*X;wgDr{HDUds@5jm4UG82-6AkA~&Lx z&?rQj(y<3+dcHH;0wno>7NR4W73SNWTdrIyyF2|-hmQz0+8mGp;l~h(nKB7zVH4v% zKP!rH5sLkD2C+kNqu*h*#LYa|q?oPjB1UD~2P#{Nk{cJQPd?0h7?&qNI!tS5QfqvI zhdhrVG%=e3^1gTXG)D9A?JqSLp&mqy#gV|pB61wd$LiubOp@1u`c<#+i!lq)CF?XT z9P61H^%@sWAphodiQO#xS#n(qyZ>3OS<>zajLZWV+O9`XmBVP*L!{u$Glq!JVBo=< z|6TZtQ1UJl;Pr{)A_LokNt1VFdv9Smz2o4vjXNkkRG5P0*lu4NHAO+{g2}Q7l`Bfg zh%F`dYMX4SZT23fm*~5Ofy?uhhX}KDm0TZI+GPx^aLSU&H%?L-SOGdP(0txZVQsrY z20U9bfv^UoLc3Yl;r87bR(93!7FpCevWO|0M`JYaD~&~L_ICd}<_Mql)F~%`@642) zbS97lV@ZsPtr!ZzHmv|H?QmkHR}5Axb=RflJCp{)juct9G93A`@}iLcC)NBI581(Wm~y2y*Ou)4Cp(9Vo#3g z>p@!iH2->0t75wj>x^u;tS`36I$YjYmvov#{K7~bmv{c7@vz?HnibYTNl%!-HM*bD zf*-?0dus1NpX=<_j@}W4$CER5by&>xS@m&L5JMEZ7;%p5pV&8 z>@n1f&KuE*NQbocQNx9N(VGr(;p%e$;+G(^VftCbUPvc_GCwZkjpbdt@CR%j(_YSB zB6G-FF^ZV^B8%dpkj9tIw`~G+@K#4cpZcIK-MYxwC<1?S{IOdgDQu7oU8L5H)WrIP z-N{sO!E6sCoQsI^%|sdGhQhvjHe_AsWG^l4@OiXg(r`1QKH`*kG)NEMRfo?nSSd-@ ze0XJ=@>w4F7OurjSUzkrn=Dbkd4xJ4$aZId>;}a$8YtFom}e#lqAvDY&w^f zxF-!OUk&EKI*|KzO>K$Nsc;nO4y#n31B1sN(_d3QzZZ>`aoB3w!92>&VN>@t5#am( zl3(#KRv}R{R-3r5%1eS-_W_!l7n{0Qt5pIQrkEHcj@O5*$S*{0k7b$P`m+8*mc{GkSg0j&2Qt5WG0n2(` zD%!qye6rrU>!dF9^fawtC`!PHqmo8(?_h zMJ0h|sr800>td2%x`1+b#rjZ`%;p#*#UHeMa@0R;`E8o@QWi;@PfR>&iacDf8iaQ}gG2AI=RO@u&U-5ZQA&^HkpUwDlD!<` z7{(eV%me^I6l*{B3U(r@y^RTz)cz%ObC0KPbD$BUKoWKaHUrQ3mLI+q`^|Y`Ie`}| zqX}g`5~fKGV$d7ngrKIYeH zI*GvBNs|xOit55q_Nhg~gk_?;;&Fz6MfQ0O!@qh_qw*iJR2^&dkLPX@(KUnPD}uu% z^hT1d@DpEbkFscYEqH$x_WKk3_SEz}I@O23-i9l1QD|5Ce^9lGy>3nbIIIXGzfUNF zEm#;>1pzr5N}M|R$$&knlOhaS@T|O}8}mhuP!=|O9LGb$h)#0V*@~ByP?&;TnM)B8 zF)u_`=UoI#M#B`>x<8iT3KY&f`O1@iv><9hA~c+@6osXE>!4~90^0I?wh-=q2@tb)cM#BP-j$s}Nv4_G|#v$%oaFF(zgmiz;A?u;*zt@W;F zeDTiVPor}$OpKw%%ORaNOLW3+9NbfkV+jE#AEeWRraj{#Z=n2@U?x-$yucW`+PqrV#*W~z5oPoC8RDc?R>OA>p+d5KDxma@t8j-pt&zoN3>bO zWMqAXQu?z3DjbfkP(6fq>t|h>s33X&`#Q^~`IjRLL;YD<0+;?*QP}Tq4(Prthm$#f zBmaj#!boRU-Xm++s135R*tbE!er+V&EzU455k+Q&KvR zU3-avoZ1B&<+G`+Z1_vY(pSCld7ZxC%YwimibUS$6A*L4s{Ku}x6{u6^AdIvHynuz zjUv*7bx$p-(2Pr3HbnB>SA^NoC1U{jCLs`b-lAC~qNKz}K7Jm1Mleq^6!mr?nReOV zpKr6D=Z2+G4gUkbfh;xDE!f%Wn+({5U67b=0pt2R6lGOQrQz2IeEgRb6{rPXhuvdM z3fC2IR0#U9ZLxhT%lFYcNOwF)#AwVjb(D2rCH=`*Oq3&>JDk2Rb{#?@VPt-Q<0lDv z1lAvVZ<%cbO4_y)^u5%fle$&cg}kX*bl(7iSzaPsU>N76(RCzG?%fRp5}|g{t`YGe^Crcs zClLr0?LO;-ZT%)?9=@NiXTXzoSlQA|lT5aD>YpbX@U%jk1{+9*ztNx@lG*hctfO$; zOhYNiwXb^X=o%y4)JP9jm?MOKRoR00h3S^c%X^^3H1&C)d2wEt|nz-S=s#p5d6v5G2J z-aTPhm>Ry8PALZ0VA$Q4yBA9MB+^}N5X9)9pgOY0`JN5 z_tN>7HFGH!P8_|VKFw4hYa;#Smm32oki7?AH7d&kB5xYz`(RnzV^J{c2QC9?PFon9 zhvtT2m1>%lc<3?y1uL8-T} z;=G(T2Imx|-~X!x=!*}QkJn@*Qhn@z)UMlA3^ZaWc$|(Xr)& zFnD=8+-mK7)z^EC)S{?sp?(oOFgX+073A1OnAF}NNB6yRfEN4VD?nj3n_CuaOesvG zXgJ{qG`7VC?bpNd?@ITn89(ItEZW|5iwbOdalFwzw71kNS_HniUG(F8Q1Nq6(_Zlz zULdyDzeWYz9eB(}j*{%ahF;b^;bS~?YXSmz>L*SxMd^Zs_Ly)Tlcte!E}me2*zG3D|T-QEbpbEev;_ z1>8x2G#B+YtL=Tb`M;}-AK?x-$Uuu#*|5U4{;Y|(wJ-ESQ$@B1#h?$QUu@GdWad5K zyPk81V{0u6(SI9%>|>pVXu3!}HMBX0Nq1k#Fi+oeS4LYr5fu-R9iwyEkN}AE8x6PGT$~Z&yN3dJX%~i`K`YE% ztmdo@2CF`tZq7-Cj50z}Ju=BO%b6qyVbYMM5D*q^kP;2Ga|I90PW)*Dvt2DP6 zAdp^ey426LD2wtv7Ds_)W#5Aoe9Yqn3c0EqUxO zmcyDFp*7Tas5{oT5+Ci*e9KuqGAi0^4q)OvQi}TSz_8#HM8;lw3&7CB&OP=Ys#e#{ zfB6G9{z&RWOj2U(12UpwV4j zcI$+G>vavdn&}0g13dR{Cxg}me&&DU&U$gFPVf>=LaCN zlO?h?9IAN}qj=H0o}0C-@0q4N4g#4BQtfCew%4%Mm>717Qh8d*)ZChu^MTivL6#Kn zr@;U{dfxOpCfJ$>VY5kdW3av)_On0beRJwBhcH8U@gbfG&RUY|8#wZ}f+sixV~>>6 zDoU1yq4WO_NAr)SAhac)?%u$Q^I`KQmz6*`;>9YQJGxv?>o8BxuAazPGh9y&p z<`Wq#gGt6KTFO`nBifxMmnT!V{pED6LOnrw`8&SKfg_y>u(1+Mo$g&4QEg}3yWvk#gmS10)RsfGA769f(yKeNP z#YP}WB00QMQklWoQbVlTdNS0YF38d%IJ4(@Shx1jaP1U3fhD(GY{ z-3b+;IH(?II+o{qF7Gz)vFuabc!(XaEO{v-4S6ZD7BGaVKYJzpT?9N(l&Yp)QZEK6 z=ehHqyoW_F|6^|Cc-(Fk|H-EQr#k&BP>uxZZ_83|(*_@eZAX7Qp+ll7RqlMUQEo7 zz?J`&?vjm4M#yiI7D6`2zsK&D=u3853tP%HAydx)6{r2C+Oyyfq^)xqZIXM>8tu#4 znlne0RduAgurKDOWG+F#a4Q{Y5!aKJ2;XHzKwND2Guf|;ui-NUqaL$&jbOlw^O`!e zgM~anP;O%@#%{B%!@UNBpLSYZTPr;v4gm<>>uc;m6zZcm7#0`FLW8zqCNHu<;I--$ zxdcrpRR*Ukd|2?3GvuSZ8>kEi`@?O)D>J_f>lDE|dqmoYE zuFYz9-wK3_n}u>L?tTv>Pb8Xs57?_vD?*Ec9{UVrz5rlQTcPFO;}N|64Jt|Cu{gjQ z9u$E?T&N1y-#^>&dLNa914F``W;q?fcX)~X z;}PeUQGvVuvdp}p<~`fkUyA&1dT51xIT9Ddq+!Qo!@FWO0i+h<^b)jMP1Gl?prST? zK2u!S+#zuvxc-s%Wy9}~ce>pAuHfhP9k0BP3Vr8bYTSgX{xo>bX#~pdlk)65Nxrh@ z#!A5$V08N!vXw-N)LHz}zk?o2a-~46h{#YkT=*#!Zuuu7u!YigPdsFK6Ottw8*&_m z!BBdORP4O`?_);~p3(>Si~NhHx;kO$97D4eF1n$zZD?o_`F&~X$@%8+a2_(zb0kI+ zdPs)J;Mwj?cAGEdr#DBO1(U*jG+>Q=sG82ilJv4ZCm?XpUD>u}4j{vq&J8EFTe3;< z11r{Pp{J^e7_a~rbtLx%&N?r!m(f3&8|p3>_tNyr znv*}KN)N+fErht{EwPixjJ;~C@{wDrizn@B?6_xa`dGmaa&?k`3Jhme1boN!R z?M49Bnu5vbiF=Ha#)QgX_OIw^=vzO5ASsoo4D2&TPbsrN$>~rFt0jI!3^pYqMn@we zM?ySh*TDc)IpGiJrnI=&#scIYjOCDHp0O_YZB4IUT!XLAhPqO0MOQ!Aj6{V@$x4PCnP3H2(^eZzqayyKNI+sf zkR85}frc_UCS(PR$w-Y4#=8q%v4k~|3G>8wndlDyVH#50cA@U8U5@%~X7QI0#(ybQ z`Vj5@fCe93KtPuy1=9tvER;Qn*R0o7q8sDEVFk5VW`e4_UO%pn~47mx>f9@yPie7 zaR?6wrfs2~ATmh<_ef6B^lZ+eZpN77M~22!N!s9`&XNRW-+9+IIB zEa_BI1AWEC112G`S8*x#RYBMs~FNbUvq%!QZix0h3?mDQ-&I`V$q)ks@2Phd*aKb?d4xByxX3mjs zvdjLnV{{o|!m{QNB_+BX*78T66QxOwztyjAmgf1FYwV+1JGP9@i_aAsP>JGEO)3#? z4y&(Ro2pa?KK{v(-4l9rUTsDsp5k#`hT*tf?DjyJs(;}g^@7ae$Rrw#p&-( zZsapZH`%e(&(oDLH2O0c8`CQDcX}ry+yaCutN?wA_>dM_t?J9dMBz+Bbg?XNwCKw2 z$FH~s9~j<6(JCL?7;2g@XyAhN#-TB&P8s3I{Nk$iZH{Q+9*kG#_n8 z_jY-cgF0{S-TeA+(Y`O}jep<%L*cK(BX0yDQ3l8EnHoV@yYe4C8r{UpxKd(y>Q+%Z zNSCO65XSeR^^ZZIZ**nVs?%7NhP5*?WF(9ckg`|BTYx9|?mMU#-QH)!Ln_^Kg$K;t@-yww|a1V+@0j5PZX+dJBHK5VG#)eqzQE(<1 z#;9Ng#iUZ3@-YBo0E4Sbv4P+jSKg{z$%z$Bd`4~YnpZzZhT*>n z;4*q@8lT@YV<9(}H3MIoZU5iGC* zI_fKe7!g{iWQwGSJR8V(u>Rym&@vF3WyImzn~ROC1lVScg#913-ub=GsBIID8aB4= zG>sbDZrCJ^ZSGh*wr!^|8r!yQ+ve;(^UgE#9mlu-fc?|D*SfBg^kTy8Qk9ih{aS;0 zzxIt8!is$_&@h2^S#Ff?LIV-IZJK<9JQp~eh zQSLNuL#N3z$4AklAPb?0fvu0BD52SVhizxh2Z4+5Vria!h8hnxr->X-QnkHNqn|PE zQl_#1!BY9N=9z!Z2PAJ9qxld2*Y^R_W>Hl3ALZ#B2$psED8lY#&3X&ib;#Cw>yEL- zd9miX*8SLlwCm3Pf*C_Hd?z9z|_zRNxTYtlR@d>NePF`Krpl z|EQqm*ARP*9CPD%$NKw@Xw&%=U1O7b*thRoR{zkzLhS^dT>jNfNA}G(;%tI@1vPep zk=*ey;;~#fW1@w<^!T@QBL0IdssVgQH!0=2VBfFhddO^1-nDuv)`!J5H@C@&U@LPWuHCZ@O{ zME{u9o+7x^7x>IfzQO#{#nA-K65Y9m;{h`M)|R-MW+bxGBQ2x*-E1 zZkpz>qL^YY6-&CNj)hV#p;V2QiGJm~Z?beV5+mTEH)VSWr4-l-81@(HGjX!cxDw?r z66vRvi&@iJ`M-H|7VjW1>kkiGgQfrn>MGM}bSsuyo*n6e1kR738Pc^+WODVGj)8E3 z+wK9RfieHz4A8$Dq_9i3{YLxhfujRqg4bH0?YeN1k9249dD5HituLRrp<@5PT6nQ< z5HVYm!ts}SUST{)a_sabq#O)NWU@N>y!f!2VM-a za!7j67imYoBygC^cg1T$gzWWu_1Tt)6D_%|grA`Y9{R6_=F<#SWb4&b;;`HKBVuiFF|CCD$=x<~%m!lpc zsI7>=lZKKRQ$oa$`&P>dS0?f#I6>!?X_+-nteAHj`v*bVv+f&U7fjhMD3JYTzyOkP z(cKTfa>r`LFU+(-$S-cUWdXkNau#vaxuI1sE*<2W%L)Fm^Kjc-B8GqFQRfl~EvAMD zeH{@tM>^y&;_7UuY_FU2d$u{HBL6zq(A<(qq0SD2GST|~g)-!6$5I<#rYaWq(lFx@qfSzPih4TK{6gE zTc@?WbLucF)QU|%fVb?>%Eh-!u z=w*mnjI|gI&zQ#AYbxsT;S>7S;B`!<#SpaNQY>s6tSO6_C7&h@Kg2v}zgP5X+IXlyd zfJm)yNiwlVd>AxFv=;zE(5`;0daUHzpU8xGXyn)-cysUV45H{kGzj?vW{U1XQ^Qq% zZyh^od;$$G12ks`7{a^na)C`}knJU`(V0q$o7nuv3QWT*YzT!|`#`!LN>WbrNu^*$ zQEyy1;N<-bB(uY(A!AO17+3T8LZG4p`fqY;X8pV*p3Oa)QtBzfF0+`!(^?6-q&zrdDp5vnPQ$n80?dZbCaQ=HB)`tm4cIU&3<=HLE zf-UaECDi}5G#IKQVrpGodLw*1tplYt7pc)*yDc~2zkBzthF*06MRx>9LP<(3SEM8d z%}jHP3^5FPDiaM6yj>QKJ+`fZccz1KZ$iaSw|*`iUx}XM1?tZC{{v!V;4zVn`J!58 ze2VzrFzI9WyZPy{d<&e)mHT9lw+!N`2*eQoM#(vmuFtb!*ih!yg;Po|$T3ZS7>qGdH8$wU`f!S0l-0_85ee;?%>I@tpQgC#fQ17@D z9>3)4WR{q|ZLYA#kr-~bDuphuNjtHm%U4ydeLoY2V`=2&dd&z)%y;hta!4dguPS+c zBOo`*#Xa_bA2#!AY)UNGp$ac-a9C!pEX8k|@n=}T$XgE@k7eYE{}A{rqZlc?M`fAj z_;GyIQ5u~>^`+!%(T~KK&?j3Q-kq|f9$AR}kFYwVfT?lA^D3D1>1poU$viAQ4qXh9`F0FAJmq%=q={~1q7 z)6zM%{B++T(Sazl#}PV%=vA%{lRbtVpC7BgHl`y>{>Sqo)a~3Y6;edRWwkx|BHMBy zd?<4ms@SR|N{VoA2vxgjFbK@W_oZe$KBe?n%ZioW!YxJKN`s1*$pGZ3xxU|&cW4SHr%$g z$lVxH-+Y91KBY&WAHCa7I*lrIqWk_Ap~$cUiDL!C-nee>X1Dx4-|(KUf%ob!67sW- z-`@tKyll({HD?_!NvHm@^DK0-dkrYi{-a_LK3=oCB>3G*);(JLvm4*;sgG|zX8&iy zt@e`Ki40m)OiFMGJJcNZ*meo9szZE}W(>a)?8MnyVfU*&bk(Q}<}=B@hYil0eji~jlxdxC(9sW2= z7lBD&rGy)=yl0wWg%lSO z2s&lnPrdQoLAKN|ndquG4G;*d*^~s~-dhU?utnWWOP>rlemex8MkU)x2ej9qeXorA ziiz*uxKY1k5mkmCP?-y`?%KC-22tN!1W(nU7FZx)EdxaqJH?0Nh6!CKBd!6QVL(&1 zbt4~Mj1f;Lqld>lCWluGHJqME$-##5Aa6S3_~E6;0`LE50ormkXT#zlAYqUqN1AAgifpoRcGRT#jYJsX?&t2RXXh>M;9DrY5Una*SR zN_kb4@w;`hViOVx6l1#m#q~>a3pPX5{s@D5PKNslKnCaYdvB zw9N32gq)I^pZlRSNdCz;n`=L*WN?(;qDqmp9`XGUURAe=$(LAEM~OqR-?}3_I+;7= z8pM-S50O@qSbiraZT%hJIasclFgL$xwCjw+$n+T#NI#dCfFkcSA?_hweEG`^|LX8e z;~u9HlG{Z)Z{)|J^vN_Q$DsdxYs{Hz=pqHv(xYk`0F3I%9J`D^k94vQNCp?Fx9bBX zlfCBTmLdl4c9g;OTKCbs#O1mt1bKYhe1?bVM^Uuc^&B4@bjtOsfS0E=(YZztxfWCS z3r1_N0qW3QSzFB6ix>%MnMY1iK<&Z{WUtPm(%o;5!(h(7v=0}k7^leaE50nvoT$2VJ29z>0^s!Qx`w!0#&CvNgk@+#@uUN#6x zC5)|b4fI5T2yYqZBa;9x9IlfYMl0@pbg5BCDa}=78%m?St4I(fVi}Aa zKjll-e=xwE6Tp0f89|SX7(|DFdDj2trQ`76B$k5!TdNUcNxhlK@2NuiHMUDU7~ue` zRN}|e$}FR6a;QMJaY4mFGKyRTAf}qhn32ruPX_OV7>S*5h974HZ|wTo3nFTRZ}Fm@ z)J&*>%WnV7syYKBU_%x4YLCk7?Z| ztbt{n-k7q)^Nx$rtR1{27ZA@EX9>KfWT*bBBt^%{K+UJ>vqvw8HW!E&a)*z$i!4)v zr-q1@br`%K)5Zp33j+&?%#mUoDZ5=0_yQQ*HecM32!}38FBy4|6mOy3uppQ_RY%_? zpMjo~T$W4i!APEQ@#L+K4A|IamETj!s^I@lScSWh-;0r4s@UzgRq5|rF_bOs-H6MQ z{pwt6&RwfYPS_@$<bg zR{%NEiNQIE&X|=NTC$jbtSqx1I5%mi`mnaIU=kndh%8&%+grfHP#5+3Hl z6U^pgVf;T@1L}{5{Ht6SV2o&<4F8k4>jSppD}@ z+Wlxd=_dDy!Cgwv-?pQVSdyZ55M<8ke52nXs+<2xK@~s#2<2%~ZnrjJ`n~Nb5RU~n zkTg=&`Wcylek~zP+s|7Z2TSAdRdh@^50_9B6HeeqDf3VS8aY85RPs*b1l(FJaM zykc>WG=}0DXCgNlbbI=9AXjiDel%}$k?`WexD_=0E^5SI>((CIwqtR#Kdd42f6nxr zv6g*1odin%pc^EaLUU3pkbTxNlOw@b?D_~#O2lSrcs?@l&S5nx%EKu+g>y}iX%lFr zjN7Ant1H}51njw)L}A8BuZw_%L$z%8voH#?E$Yrzql>M({@}W{a4y(Rk%$#OV25kp zO9?0I5S~Px3$FdtB+5Q>JG^;wZ68?Xp0=G6S_YNYF|H#UF+Kt>YABiBHyjC2fpxZQ zeQ5{`Eni!^5c$hC7SVkPs(x0Khf>fTG}>?NxT=}2Aru3fRY>IC9Dgp{ zar(|%9E3REfDhl{@4?X239sT-+`G=wnEVZD?f=KDL@0rgtN*IFQU~G#w)Xl!Pu`Zt zCoGX8D8Ay=3*`I7p8$e;enMU}f=an|Cbd77Gq)2jz2s1V7bM=dhdOZB@AJXOx9C~V z=zC|&fR&AnA1*Dc%Xcpw5*x(c7yUoUi>5Z)Pi?oXJ70M0mV+66Z$A&qT=Gt2{9C;~ zVh0P?EH6L*Zafxwp1Q4iM?bDTyBtQfdz{-gzVrWFf81@q{LcBbV%-U|(>Up_art_E zl&;@N5;b`HHG98dr^T{r{U8l21JKxN?U722V3l0Aq>)k{PD_Acrh^BDbPrATRQ%LS zVXmsyz{(FHNMM)aH7Fz=^5AUiF-3916&`9t4Tp~^H4K~U5h)(!TqfbzAi9AXy#o>Y zja2EWW~6ZaVqTb?$vUym0)uW}%KSaRQ zwltZ;;CFA1+u|-amv&rrN)y6n=#oEas-46K#w}{xEvZrpJNzV;Dj2+YgAy|iRk#Bd zR|gP32R@4ciU!{vT>@}CL>@m8n1>LiJ74h1a(Z!`novpatcmhWfUcjl$+5pH$#aeF z_HCE!sL5f+NV?!sA&kkzKZ+!IKw+Y{tjlQ4eQY2($};}w=B0-g94UWrlGxazJV-eM$A`iw33km zD;J(@$K(QR$L&B26beE#0}FV#xchx+%~$JUswT3nR-DE9#xsyA3%-!=Khz|2^V99*rM=5j@Z%e4yDbOkAYXPqcs^l2 zb{sY&3SaNN4gfkp2p0DD2 z`AZ}YQ8e&&W}Bx?j<<2T&zZESe4ReH9H1KQ0G~s?MfeM9dO36IwjK44n{7YVx=(8x}B@eOWPNv=`(Zi7#+0l`Rduj#&;wnP;M( z*X%Xc-ASq3K57LCq>BFp(mHf}caUa3tL{PZV5W*yUzHcjem>5+u&pFU^>nICANrP< zHES3ISkHo)v1P$zgcDtHr2wRca27rG#{)XPr$Zj0W9y1u_QNIET^*z!wR8H0fjMd^ z1O%N+n9u%uhU3wjpZueUzrH%R^=AVv{z2V~Z5bPl2+dD`Y{2E0D%VTP@FxI_FV{cn z9HO5X1K|B`sssIb{apva)O*qQ8_;Dml4I7%wfzHW)a@vm|14n=4wUmz{hHH!7kDWY!7#q+UgT}b`Ub|aTFBRTr8H3iQ{dz+zlb>+At z{O&dEuM^Y=P!*CIcvtbPqeSwMiZeC|*Ktm)7DkHhQ2n#f{u&25OVivCkwT2E1=#pZl>FFa8%cW{Hp7aXVlXy&osYD8gT80H6R7n;yuBX`fMvmf+liK9u zlA)G{CF>cxIUENKCg(0+=9YrKBR%%7_?jhGn?>#73=g|x?b0=cddT$aoU0G9@d94f z8wOO9v3T?#-~>rSa*mg;{deU>TE)LUAc5MOTf-@S#T+wz8R=e^NO+}itN+3o*s6HTBTw&c&H zsvc8NE-T11le2pm`TcX=8WV^z=`r}aUA9QX09rS;kSPw4gSJh~WsGr>a{)!l{X(0?sJVRDaQ$%E>Ldp<+5Nx=a=d z{>)yJJbDuvL{N&hOvkr-I#T3p`73lNk-=(znKdrDq1MRB{{Y1|LjmrloEdw z(e80nALfVBVxg6z#V2jIkhRW^;XoS_BjT47Y;LA6-1F0?e)ua=g>4sfm|!fx1#0-q zj5yL_0og=|XmJ`;#yqNw(32IowvhFgjxY-GTuX>OT$*z2{>jV2&uJ-N{f7h^@K#dndMm}4UJ-MAZPY;eOS zDF4cvWw7w?Do(APj^6kPU~G5|8Z243yF|0!fps{gnm@4o+byZRCSDUYn$KR&@qoVS zV-Mv(SET@&mh2#IlP%Ua3iBzy69JM*VTkHchxa2>d=9z(qHm^?DkM;xKY+S+{n!yTY&np_88Ta?9km=$;mEu8*S)wqUo5$DUiM%r;(^1n)=; z2PILxqDn61UrOnB2@IxQG2zyHf7K2%CtOr5YZ~l!&NOI9O{HdKBTgH)wr+m`!;Hi> zPB2HmU@A6Z{Y8LJf)NAbhEurNU_h4E{dW%5*w-BPBd_A9d5J4O1~@=A;9K&aA8t=) z$(`-x!Q*GeI z5gx{t3xvK&h(*C2Uxi+S3;sw z+pVTTUu(KLCFx)h7txqv&!LoHVm#i7;%Xnn5An<*7DGAgX(qEZBX3p)Q^5%-&&$^*Bs>5kp-5T#Nh7`oaX|@t-1Ss8dMaqAR zem26`_bX(!#W#~Rw-*faj!S!qS1K1;r*%7IYG~JZ>iC>Sct$_Bp$7lqkA+ty@k=VU zJGnt%@pXSmhjkXwmwt`N^wPWi{i-73Qv*;M3`wv8XlH4in4LE5zbHwdQ6Q?Vt3YZ~ z&wqPQjeB7^Ri16Q`@)g<-7O>Q!V|yh68lnr-YADD zs4FO35$dy*8a;BvM@wE?cs}_$6)rd(_L$aOXNA-^*T4jHeBIfg zHxq;1B>vO}eM!5h=gLGAiX^QmEv+ZK9SJqa${qkfytKS_=x~vVYVkmG5Ggj%(lw$5 zqoEZD-9AH+Nh!`PgA%V0zhqb1vYRTW)Uk5bbRbEzzW$`eMmYq(=T6xWz2e8i=ORe9(y=7sul{m?{6c41Yu{Q+aZ~>d8Ts z+r3enA+{V1Y5}f=84`tSXGeZbK&C~c9^^O+E_FCikCoZqwFV$&j1^041aKrUF!kl~ z)fN{>KhjrSj-6vxyf<2uXwf-TLY%*wO+*J|0mOcw#BdR*6v=EKDA909I(1Fi;+#yW zVWG`b9+RS@Ie?>b-r-35v$-f-Wr*3c-{8%s!Bbw~z=cAS`Q)lA7%5yp+wXiLKaggl{Ej2~Jq1NR5wE86>v6 zbZO{NhD7q#)K)tr9*CUOA6B==j}!vLP%1d(brBpm5)uE`&5mc26gg8AMTMIzmmQ98 zaPm&7F5h#iZR_{uyC+>ctntWf5>+B;&LZf$zER6Py0PC@PbdBwyl7l+FSVEJK{-G! z^+GHSEm81;y29w~u(s-o9fz~ZISR<#{Ed-|*HUx<4c~T{+3O~TNI7`kd2F&rg14wl zR&-ivLw|~Ix3ZziS^dd|7DT|WtHub=PqYM=yYY489hL18iI{v)Y+sH*lIQM~0obV) z2{Dm@tLYOqix8(QBmywb&ucYCI3DqABPhFCby&ZEY_T-+tn`An_1^l{ zM&99Jzu4q?kzU-A)2nL8ocFnauMr3i|9TjgaAw3mx!*c%8rP=DP2}OiLh1uV)rP+> z{*LC?dd(+f@5CJ_9-x4ch)U|wVJ7S!#4B^EFZMjJq+et}f0B>a5xM59YK)wID4s78 z)lcY%Ghuh(0&oBUOzI;&3q2=!{AvdAu(IxMhy zdyIXARIa{7JZfEbLTf)U=1)nLr7sHl-m%|gR{eHi71Cpu(|FD!w&Iy2k zxa#$P(yagdx*G)hv&%Z&_`C*zu(=BURlA|Plpi^eM;pj(-`ln?H$hjWbx-#x^2{rl zq7eznKOk!Ux=!%^OD z6C_-L0W>x}5dj&gO?m9p=u2yB1-GD5GU%0nE;)J!L9-s$HY6!)vnF(ZSl1?IlQyLy z?K?hFM!aR~!-7q+VrUwQI&S_5nk;PivHMum3(Y&4Jx9Q0`uUSU60AMGXo)tMqt#!g z5TwqeLv+5zfHM)J+w@ygsSRQ@P&8D861EI73p6Kk^5y;fRP&GETyl{wp`bJ^8{)5_ z2SR~e7&m|8xW~f<-lLs|{^A7OT5+F#oL6)Cn$KR<9J+pkTlZvrjNIFwaT+WshIkp{ zT~rJ(Z~5UTx_#lbHh%s0KsN4FLBj>r9Rj7p?zb|{U>7O1B3Rm(2?fZpLsejBp(3DR z>ld&$d!NkymupPyz?D3`$$g0r_gTEzUvxo5hu-y}tax5J6O=PK9xJ=F=!UO42P!F- zqgz=~>G*g?g&`$`xz*O?M@~L`!R{NIXQk%hIxf;*#>-*ChjXjRjfY~#PU-`{3kl8w z@ebG!IetgZAr``kWNiU-blEv*|i3JMyGy!vapa+MbSlLN0fu2+!3=NF`{)ZT&=zqiecg~Q%44I-KjQp&Cw5R6eYaj-c=&{mdGKAl5Wq>f(C54%vVID zzn2uv8Jb)YFM|waJ*k|5+ga1|nuH=k_Yz#=l#-kVw0Pf6*0#xUcUmw1jAsdLi)oIm zgN)NNA)Glqr!gJ~1aOYn8Z79xXJ=E`nWr&)nL^c^8hpR-xcHA!QCy@}!CIa*IW(JHFsP*i=!O5`hv!1Jwe{_J z1#xx;i&w&9!v8#;L`9mriE=>0UI=1}5x#cv}Vq#q9tj=oA ze>G`X5bu*z1w#Z8-5NVOpc~gbS!LO&Rrp)rrjBb{Sc1G>A9_FDU-gWQ3t#EQ<$5UPA9uMHbzvJ|j8!UAl(4{WO2{5dOGQc#l#fDxS1S`2up<&#y zKmO>032|JHx{S|5>XrZ10drvBmMpk~h&3b47#1tJ@QT%IBrARLzq~O3mn?BNm1heG zHT=6d32&O278o|xBgjpg|9wJeVe*y;DZMvmNBjZ(H8UY4bfWzQk!NOr0@&r|f2(g; zBLzqknFNQMrx+BS7vZb;p42FDf+dVLs&VDOo70Q}+f*W}?cVff3dn#~5>GRqa%d^S zHDJOfpqOAY)Vz4p?zi_CCy~Eq91|E5ZZ68fKeggFDcRi8!eY9M^=r`30opu{Qdl2%N*i}33IF7i(~2~! z)In_Pxw8}g4_QP*+F(dW{!hU9|Cp~_2tep{1hoM8#?wrg#uESk(E=P&U7O;AB7k?Z zpCQme2=*HgAsRSA-&pac_2%|An+-eZw);IB<@(SIpP2jNQJpI17UoM12*!4n2NS-Q z=BYUG7MA=0IQSht(M|8+48MxgvJmQJ0=(XpsZ2{d2qk!%QXlkq?vK-q^TaMX++ zGg`yJ8Ry23Am~%=@2}}$4Ge3Vl_VVc+U`V24RDd4qjSnA)!M8DfDg%0Lkw$|FgF-A6^h?Fjqpl({+4Al=N3`W&|}5(P4T(KN#zdcHRRzsOA-%UMS^ z1=V+u$C;)CKstS#S|+>>qlp|Lx`8YQN%}7DG_c|3^nKlA`)Cg)_B3gbs+rL*c87(d zeN2@wd0~9wp!!K-#(pdr)6a`Tdoi`nG5sV30iBcr)jXFmBOEOCVfQ82JzG(0IR_;K z9ci0}dUa{a4Z~Ps=VSI>YMoh;z`13hcsFp6se0>FR6bl$mUS32{v>|C5sqQQU`PVZ zP7~Al*}KKr6}L=?zYHJ^fZ^g2H$deEWVG0EW>AE6c(neA91ygl!ZTKl>sQPQB^rsJ zspG@D@n!lN@V9PjM=(O(!8=@FUfeWUl zPx=O>HOu~wZ|O~D{Df%Fe!LpIu##AX6`1rG z=nT5eevQ~={P-vQhMokj{8V&pK8N5A)wwwkpty#~#Qn+hncDvh$hh*65WwkhA6DB! z2fivkff&TiDCB-(Ob?KqH(Zp2L1MH$e@PzoqL#P=<6n5Zlsa)8(vz3M-j z+R_dyNW$+AssxTQ9SJriF6U*KX?Jk+DV662CF2GDSJPj)IRV>q24udJxgM0cNo>Cr zYzU;R0HxVMxWYRKp<}E_8E~&Hosd?}z$6EEcv7Puo8t0dVUWY@F{7poMb`K$xScF` z0N93=baEKDEoW|ja?8^Ml#bsT=UF9w^^u584?*c$M*~y@#}_~IhRb~+O*7$KxNjXn z!G@+HUWb={orMzA+_Z=y}lPHanTUKp=792>kfuSdh3mmPT z$Q9iOM&cCb#U9X`e30DU14qAlOn>7l!=;Qu!5LBj2lL^7sfpWR2&e_Onriwtl)X9v z77}@Ur#HOLmO>_8eu)V>fH_^Ne*z|&TLV5xX(ga=P$3=1M2~K(Qs*@SMFl1e z8wr1RJT`l#vCzNBNVM4i;d^L`?YF5c;0%6Yg89PO!J~%E@wcdjZDtveUzL!EFh{#W zNBcFFc*_wgC4|<5RQD-=>n1p#Eg+`>^)5P4=8uak)9#Gs9uUa<#7lXsphiAxSH``Z z+hZ+6SqteZBqn5Wx0YJR+|d)Dah zJCjlo#I7^QJ=vZ6Ket6LKS`(*si7_+p=0VVfoH30#AKF~5XzOMxAF8N6dR=CMn_RTBDn~!GX}UN$IjJFo zh^h>cdo#OE=8NwT_=Jg(e9SD>muv!=(cqiX4JnR>w2~&QUk9RWSH4D?Ae7#7|7i6- z=XRJVC(iU~(h8&&*vE00;Pu{Y-)gb6^{Gd$D%v^;ayr%9uPLmySgtA0rme$APrzd= zM3sV@E1=UffD>#1E0XaVc%O(uf1Meqvu+Wq7ee8!*G#dfL{@44dOA3u?_dSb9x0cW z7PLHo(?vvtH5yC7~}KxM^$guOlBCU$0JafvIw6Y*^T` z7mZ(TjNYB&orRnov}p~YL&*-$dRtVX@SkTWo-DUxWG<;XQ_Fq-RAT?tmESh5iKKNH zO#y4~g3KC2WwK(3Xijp=`h6IAr;0hzu+fwHnEM@~(7DKZ>*rz@u~cu!R4|sq67Cco zV>ZUysINOrbd^O^KN4g;wzR|`fGC2jIn&Z4 zOzK!82%3&()=#iRdQ2PE?ct1dG&y%au!lYt1CmFA>JU>lE0y*~P2Kmp9FY z5G26C{%0QHBh!BFAKjY0eAz=n23Ef8+it%fdXaZ!bIs`w5`s%)UKurg)L3q_UUFmW zc)gsO1PJ7%l1x2cz7p_yUjDW~Ue%U_-=e0WIr=+NAUwlP7(3SlhI}FVR?wM!5SgW7 zs-WlBi>7jy7kC54HCPUXF+FI8_sDGRu>nO->l6D8xk?iO-OzCA*fHl9opNU896}gv z;%14QDpe=zRhZB+n;}NLsNSqi?;pARDGQg}DjSGYTTiBZkP90?fw!H~GJVH$clBlqN(S8H!5#ufF!M%4xIfnPhIU*B zTb1nQ**Jg9oxh|kxkz^(6s!hTvioktc9Rw}T>4M{m0D`DAus=33w_c2DRguyvt4W$ z9n^Z+PMlPs4!x-5MacU!)yu@Y1oQsxghMp+GA)^-6$J;Cn~w0wQxu0tM1*`#zskW) znx}$W3_WlkUofy-SmOjE5u|5yjTVDPVd7~`;3gjW+LxtWVGTy0jZ~tGGX!n9?l$WT zLq|NM+Kkyi0u*U?NV(NZwwis2`!)W}d0l54m}rY+7G(@5o-Hh^(qde$bAt6TkbTDL z-%IFj_F3fhR%2AFc}5i}n`QsuG-a(;Az1ONuwhzzargpTITE^i&!WWW^li%3jYCHH zR2(b*Vw!6StN=^CuYlkX&yKHJ)L%|bEzjH3r7|TmOtLF6HNOdIgPvRJ%SvXLJx~4W zK}vO{g7eI8jZc(c{Ue1Lby`uRadJY#Hqz-AHuQ+Bj+Ho2c>G5e9g5n8LxLom38oU= z@`r6QgU0t~*Dk$S9=qKj5mOGABADR+#>j*;E6CoShiGlr7+;@fT)bf~Id9j_p@AKr zK#nfzjyG)tAd>&N5vqT0IXy^o2XKpq*C`>H+ltaG75=!zx{ z9uBULNVc;pm;mVCqcxrinxIDZqySq*ADBf#H~uzMb{0y!a5NA)9L5|zQ+Ttb4<~+R zpptdUV3{-r?N}{gy7MfO$rVd^*{%aQ{G3Ebz1cmgruT_WArAwFQ&(Xp`=Rn^5TSa_TJxxMwa?zHHDfp;yN~|lP z^?R42Z6a1~rx1{2$PlM@(PAtvt0)6&+f8a1w;Wk*vYZ57=uRZdQ2vrUJ{X0rEz&*2 zqI<45EH~k4~IXgHlR>V42`= ztZfz$yWo*62?4M%_OlhyFW+zKRw=#=xwlV72=8bKK@FPAMq18jIXip;oWdzE)7v#I zoX57(ZKQ9}z~$3(F;)ow0^y=jqr&YO`!{gX7Ot{F6zF#99M2nk1=TMZat-alhJvpa zBB79;nx84MNQ!jsGtb6UY`i#hmHCVGE}7e#(LTp>eLwmCQe$@sbUrkN^C|F6+(uPw zfx)lDVu1bg--3QqzmDT-zx#a$UcGSz3w&sHIHe!dug!eqZbL_@x%Qdcb7x^Nzg zQ1=4c@2&he%N}(2=-*J`pwOQDdz`D=1IYsTP2aE3Z`UJ1A4#U*lVd9wYC;&sV%L@5 zy>}WBLg1SMcL!@FJ~(#6w{yYR129v%lISi3QTJSs=A&nz-cog`Ydck3cAx3K!r5B3Ue^vSjtiABK&k+BXYuZiHtbK%IAR-Ead=wzz>9lS- zcj*icUaSoM`1sw>i4=**?a!1G;cLH!zzs^LwR8)AXs)G=PFznCl5b+b_1_ydk z+^%}OB5Qx_aAmAq*HvPD%L%?jhDz(3iWmMG%X8k5FT%4`_SM`dIJJG=6-5JOdqeHq$e1eW?*n~`%7rDhApb1Im>@lQ^?nOWTm6faDzTY!)2^bl;h>S^w*Rga*q zfUF#7mLWR2%vXtn^>uXKB+D85VXd@3FF5l@fc9tyWYrWZ z)^#)3r&bx6$L@!}e>;)tjq(QS*SlrwN!nzX4kY-NNQcET7z&SuKlgQ~o@yzVtC|wT z_Gg|h`rYQ&d4;#@fnV~$eEvzQ{QM5o1v<7^nr;_e&J?!5wMYB zCZdnb4dY8m^Dbpr+mpB8y|52EZa0bVO<}rTKTpF~5kGPu-co^lpNQXs(yQ9wDm$L& z{J2s;#e2N!=w?*<ZQqk$M^K>NF@>r(48pO8=;FQuI&eK<~Gh_bxh}a{D*F7+v zUfVf1I@R4dyGjU8W1x}Sq(T;gW?#x2j#kbJeinaDm0+#3VP0vg-2chnb;(V5RHl}@ zAt2bj>Cm9)VTCax{v|2$Yp-&)XTtK&2Ag^v|L#~J*Q-jN?T*z|44h~c*^-nv6ZD`L zO9k5GoQ!u4>C&%Hw92v1DGOf+r_rA^B+SmHBInO3paS0pSK`lJ|DIj`NwD;MdpOwa z+FF0?+QA&vdtCq3OVnE73mQ$LPtx_qv!$JRU(af(%e89ZS0bfSGI0C_-aew4I@|~> z>Kz)Q<(BT4t^udX%gNp~GeuG07G zFi+T!VJk$lBb>ki_jpv*$GeHuZjBBxAt0;oy~&j0|G<=!)27avux`(J z`2r1{lQbSW5v1s_SE1XPb#U}&TG7JI$PC4K^WA>RO1}I3aUCX|-6r6p!@c3o7*KNd zf&lT;_3cNIh|Ksrl@0!RBz3dP)h4TJ)zAt^7rykw>3cybliU+@pPr*t)_XMs3;J|~ z>VNse5PPCf6;b8Ph_CS@rm$E611ZY9QhXpT*`RG_=_s()-5Ako;`OVb#~zFG!`2!D_*IH}qZE|K5Nvbah26N!4sWzU(Nkz^~M!R_zD%7`9ZbJ+-0l z5GN5a)x^MRxmY9FH$6ixx9l1N$g)Fp>*3wX7G`ayrAU+z;B(02IMhqJW?m(peB9fn z1KiUz&eA?d5Cg^4gPMR0JlM@dOPm2}6DO%NImzvF=3v`w0=j-J^#^Ic9~iJUR&_!F zChLlKLINfjB(5tQ9sVM4ho$pKxAT9Km?GW9megVb-Y-N-7CJM2;PI@QQ0+rQBmwSk z!7flLSSqN*X~!Q1S!0{Tc{S21)x7b}Obx{dS@D>>z@P_B5cxcd`h1M;8@7FRRK{a9 zL_r|C_53}jIAsTo0tWY<|Iz8`wITmrunuRyh^eKqzXZE(y4x0qVbd2EppW&3`^ zh)3QoE0C{~03SO!C+zzhYqU(?`6f=io^@+zH)b}l;oTU}FyG0KhTiZL;lv<~luBRL zB#x&yoMQ#doa%PZg(f7hEM|m8j z74&*eDt#Kc@9OJu7N80FlT5T>q&PKn?iD?N14c9mpZ6yJ5v|tMcfgr;2#m5>ZsF(k zbRao8p{|O$M+%X5)~-^cGqc!}P~+omzJaf?qa6yL~v6XBuOF$Ci<+=2`-+iS7e@x^ShhfnnSldk#9M|$C38Is71 zXL|o2)m_{ICoVgmn=no{I+CH{l{KG&?N1Q!>S$gDdzvwU@EdM+$;PzJ5hzjc%pu-X+xPB;nt_FB-J_} z;@pe`1LL>1)s^IAYHA7_N!DmB0Oxbns@)2gPUhM5e6Ps|W!Y%*+c4DJ2$imw=u7&5 zd2TDCG_6YiK4k($l@j8;skSYykbUuv_=-8TGo`KDFFyKp52dg))EQhWY7Hwj)8sOck0SM2JZbdU zhY$1-hMjmDnX0~Bef_P&RCT@`mb;v~lWv4$raZl5sa{^GVe33TOmdw`{NR8!j)A;c zyuy#+*GS$WmlG0T;b}?mT@Fv8y;H0eKgsx;CxI+qELNPUTDSE$P$gBloUT{*3M&1vT~3NeCu^S{LBn^ zuh9QKxSUX6knh9IFOLo^Ro{i_1BA+|XY|kEE zJF%l`M3af*O;K$7j%(S6{=&r`ht~_8s;k`Ss_kxY)wg+#&uz|}&&!(VVe}%0!;VW% za$_DI!T%7LG^}f#!ZP(25Cg}Mo3*kd-#&*SlGRR-Rl}>?SLnaD(mqJMmX*122jQ8l zb-Y3Jh!0oU=?j%`^D4$Uy20HVgx335(nQ&g>{5(+C9Br))J58VBndUy`irz>bm*)6 zT{FZL_OK}$xF5SezxPM6VKY5Hm4D}vc;v2I&%)3&q4kinJdn+L(|yt{(J#8KS<)#Z z3{iXWm}M*0p3xT?h85LBeE6EpKwBQD$UPwF)~ysFJM!7N6SXdq!60Kt?y&k5Smc#Y ze^X82n;;3|#mhWxtyam$N$TCK3KuDgObp=HCL0z+c>!B5Ai!)?iS4Nm-Kpe$RYpwC z6mu1ze29m>hJ~r!?P|FyIoHq;I{JRJb7d^p>dbD6Kpy7fq%Mc{ah0Na(yi_={5pFv z1sNOWfX;ja`SlhpM%N-8p8Y5jz2$@u{p(>I$QS;)B`Mb3{{w;4)G;xGV@0Xf0Y5aM zj7u~!G$-uAl`>B->I#gMiVGoW=NHIC- zFtI@a3bk#=G`Q_>@KPCy0IjJAXi?3zoFV(Wgww#4HP1)BE5GzNOzKmBA-^E58z?CJV>8RhdtInqJ0TgAtrr7>!2n#uy;##2Y>>U&I#5eF%0` zXn!yKRj<@G{$>f}HFpuwi}&VHuk602je2K)-MXsiFzw}d#=j=sMgN8S_*Vg;Z;S`Z zmwXXVo*01Q8awq$Aw`zncKhdUh_@E>`EG;*8qwa{+PqVuWT_K^++1-(mQ*%SV~=!ZJe1G3&zhJyM=tH}sqXs+)P zrWlJo(VHVOdtxAf3Yro|1Pemb1Sfqr?1;GYZuJY!I#^({#4lvL)v-}0BnoNKMiEbX zUs5QUw9uJPa9 zf+8^_KkTK+5DNT4yPY}9uBsC4yu+*ZD@8{6%t$@?HPs|&)_PS$V#oTcPyO}CN;w=N z!~`kOjs{YJPD>0GQf&E6%7HH(wuB@cj5&fC@JN~Bh|RgJu=N-Z#;ken^@>nm8FAEj zl9ou59iMrq9=;8b&brKhLU}3tu;$hazMLv=OuNa7mf5@nl05X88ppPiT?nw7$Ki)1 zuSmYtE;;IIAJdKLIlO7B+}qL%K207wt>2?UP^4uO%L2U&njP;OK|Wh zw}qm9u~M;Nn%rvHDxAqxhvFR%1PQ0U$8+tp)TwOewJ;A zmGWz`(byI~6yJov8u>hRG}Vb}4dYlHj5)MKptoTxAX$*ir z@f#>leLr&n08{$*fNPp#C~PPoKdg3Ul8%{zE%%!P9&naSYh6$XUS;FcSwB#Us0;0y z!P{p=u14gPgGgtz_Bb*1h_ZQDwMK}UT^I$Z1c*=f4IB-c2@hK1w%+6CEbo6>QoPis zFoYvqn`~p&q~-x5wr8n)?o@GY!y5oX0ax5A*g9 z$ADx(V$ z?;3tk+<+EBnkKfoPH-JPN{99|XWWbVU!-4kHrL|KwS+Rf{49Zbw8tw;xLT#saX+av zU8IZQMXv;t@M0*m#wZv+#E_!V5H+g(HHI%#nBj-H@C!$ZRw#xp z3q}ziZlOGQFssH#a%NdZd8IEtr}ovYw)#kmX)3Qp#?I1X*H!|E6Hg6*=_{%DEC+;N z8HIi3c*sl(;Lb9FSFP6|)MojjEOD@+g|B536jL&7+cnU79(eUW#k^gNGQLXK=Q1<} zA`Vj*_MID7;11&);dnWjkMI66D$!9CDBpKMYTqY8?4Q$!56awl3Dyt*5((VLiW@k6 zj3^1BzVJLN{)S9#-R-DzP9W~Wnw_S`ImE#wq`e{1MQk1<0~D-XwARm?vz||ITAKEf zwv3p_XhKRpO_RNpI>xR=YpC&&g7xNHfg!HIK3J2< zf|0f18;m$-a%4urnP>9CP!zrl_q_l&4*>lhL^n)^w`mI>!jB=kF5HN2?xjO3in<;|9wfvGXu#k z4VC0Zj)cCa1zg2`v0Dg~a4nz%nCF}-qb0ErvBUP=08&F5@zOEP?bHS3{)`#aAEx0M0;$kwvkco8zHPcOsH8A$w5{eSx9N#96@0-QqvI=(!2)l8<5Av8;*2 zEHkq+ermA=VUmV@N4tOt?q$sOgtjF3U8MEV+>=vTQS_c}TC}c>DpNe4>$BDzE->r;(8un(o zU)JStTGDipF-y`xiwa%(n_ThN{4LSRDiGKSL^g_k|nCbBAf=$GF z-S5!AahQ+&J3Hvkmmh+h9Yim(KYZ87%=UuW_SXvp`4k7D4K?m#e?UHJfQTjpQ9P3R02)W+P^tNo$1r?Bz7C z_$fy>=TshCFlOqfskhLf{M9(ZUX3utFVKe9`$8Xunw7>|H%u!B4#-Og9O6C1=M`(LAc6CMcx2;s@^{e(~ANh)_hpOK*>zN=O! z4CQJ=J>1Salr{@GK)(UbIUJiWKg)bH9fd0kmn`UrC!zBrZw$^WS6y1(Zqtmq5JaDh zE_b6QHqGtW>WIYuKlADX^+W#$+lLLA&A@brrU=o_O9w+}b_k?1m^#aq#uto?e;RiQ z8#t^Z`bH~A2P?j+(YW@k*R<94_ML0;<%rj=F$Lor;&by!ENJB%^3P3Mk`?wcIF0$C z)+?qNhmFyX9HM$uz!I;A`MG?}kb}s}7Iop!x(@N^l>LH>@>!3&4`#Lcf1oh?gL4qJj zwvH7(1sjSp176J4V)nTl-c&YGczc#_i?W~|?jnbWDGGPZDQ6Wu*YPWf3X)QC;WIIr z6wzb8eSqfP;(U;Pr*-oCbkk<}!Tg?(gU`LKk?yk&%7xi7=TDn@Rscm}*JZjcaggdm-NO9o9Tppye_}3IZ);uge&`o#gl_@y}}QOgtX$u&GA8e%A%ti zo$0^v@jFlsrSG30zqEEsed3`b$WWB_!BK!rPHtjVOn`jg><+S@E2 z){zd$o$1myB(BsK?WZRS1>8c$C_+`^C%zCNpolD=Z3uXIM!7}3ay~~O25hOjXWdP| ztJ!Z4kK&8;<0Eug5^LMJCWTjpkyQ#+d0(K0?oj6LE+Ldj(MDJRB2nZVKZQocc%aNO zr$`2l?c&a7hW;XhbodFDJ%fZUK>-Y9BbD)lxmRtbLGFr7_XYpG8NrVi$~YSm6lK*K zjMKd9G=;R=fO_*rsK?+W61@?|3~s2L4#m&`_!Gq38^B4mYwgR|_MiPyy?|(il`y6K zJ(20jI6AW`V<9WH55xpN$?NzAUcs~%^=~6WuHAb;=W?gGjY_8-$*Y4F7anQ%wls=Z zEg*6T32={x4QqUAu-0x$VHb7ZwQocaFgB#6Y9{7X47r-hQ3+Lv7#Td zH3kKvhdT;7hPj?8Fl#(NUpllS(**L;ilk zzNur$!B|GT`{nuJ7QvE#seWStJ045m8WK;cUbYLd3><)leGXx#6SFX(_Vj5d7!<#{6v?>7vrnso z(ZvlDvxKU70u3aHk+wpwF_AnmWh(N`JdHe;naG#5QzYVx?@l=F@bq!B11cxSJp6&s zfHZ8%2Z^|=l1l%L9&#K>IM=tf=Vtayvl9c&)ELx36mUsw78gC+dwAvKG-CxtgaYU> zUVU$~CqisG?rTZOUNc^=CcD%s+bREF7-mXi0dz7-2l-)I9z>l~$zeb!sp=~pcgCgk zFbc{pYz($R-RF$%D+q8;O(Ct?6mL@V_^g4ywS#}J7S`D`VG8;&0hL{m#=9^; zm(E;`OJ~0~86JWIp$C|-2B8;vIr*8i+3M2}HPUX?KIco%uq5-(r{9Y0&|jbB20DLK zf>ZUerSj zbg?6p*x&n^N;}__j`e>zNj@ZUpcEzrGETZsxs!0=M`auc@_Y!Z`y!#h%2G(`iaqu1 zjLH60(})1vpAdRk!j6qW;11y{{}U2>l!e%tKsZH zq)=kMWwTfK-KD;{;N-x+=u9|y_l7xIA)1Tg(9l|<$q5{uj<6=ma-R94vwa{RfL)5d*uYuzDBNLcjMxn2!!X)00q1$pE z9R%kmic<7a_=-Q04zd4cE68WWwD^Nila@Nyd+ff)XDEN&4%OF#c2Mo3{x-7zkCX$0#&sv;b~2y2*~4b~n@T{yf%yHV3we8mVXgU!!;!GSOa2?wqLItG zzK4wja_*#woM>Q%3~)+S;6;R6`#)LHs;mnlNcCw*XUj+D-D7M1e_v_(M86N|P!R>8 zikc;GJy1TdV$#Soeji~ui^#wJrUZ*V_Z-kMSVqR08skvH&7V#e-G%!hW#>EX>yqYK z!_6SNBj(hGk%)7;Vm@2e-fP#woxHP{^(VYRY%&~L72+X`Ry14kY@uauinS$_A2tIO zERtzP3aTT=Jur)tBMGSpou;(X@9ADgW5^Nbt~HN&bCf1Zl921tZ@t#-C6Q4%TirM= zNqtJPY*EQMtkQmOU^J%Hob=#H+;3rblR`@~F5-_&AoYAGJ?ZP7K6M$Xmpr;33Sd<} zE?EBZ&AQw%lj_90%XE6mlA8h0X5PqyVbQ4)qn4IrY@DI1mEDhCT>2Hoz2VjoNgTgn zp6%Q&cr>(lqw#ZPm}=Nz+0aYs+kio^5FgR?WQ4xj`dnckmul_leV_iIBC>M$=^F|l zP8>A-#jN0=Wg){5m4)SdPXq=*L0LNe*Dbw?3dUzIYg?=$BF(y=P#B0#Q4Eo%WM|9Y z7EX=CzG__aC!NXl-7_ghBa8$KKJF&qr+18z(Z3Z4fN6{ElrB25>8Dvx)AjgG#0{IAn;Q(}jJmZ7@#P`jwr z``4HF?Cj8-f59x(EA{TZ)!0eQ_T0@^{QjK0_f`S`8nET-3H2MI1!!AyN{nCJYPjNg z8n^0D)Tf*ALX7p02FqkT!)4-`K>s<~VcMdM^Y%b<6CQ!weW=c@nk;^#M=_4)StEmRG3ny7X7dkO6| z<|Xl}Z+E~HZR*`z(US7yaPx(*n_R1QK5)_R2s%+O?dB;x35K&UAWjRT`hLSG?AjlH zp}$|aB*R|kAsFkyWg%7+%7?t=g4aRpAAn}Cte`zk37|xu&sG@s$+UD20U(43gfpSx z>36}Ymxx3Zx=hWBhC3jgrP^`k{F%QcAn~tAqLlGKMj*$0Em#l90Ia*GTTd(hh_!g* zB?^_TMV#*te7O&-fg|Gw5dWI6gq#YK|KVi|rvYcrrO@xS7f;&}@q)^_M60r$*NNXS zX8H;p^IvF+!bPA7!hftv=*z^rQ9SHgTQhHA$sraR=Y?pLIPp#gLwMzF?~IrT5j0z8 z1MQ2`>%VZ?Qv?#Z`s&j&oS@IQ#|EyPr{#a(iTpjGo+e}-qIhJi`}1Yh?VBf%jyKkc z!Vvqp*Dr^<1S{G0&981phyb9kz5!cnu(jB+0;m~zX_48QRU%=~F+@npkc#`}vH~5I znele%g*%k4``hNA=@G@3Jnqrh_?vQ0zDP?we1oMR+_CE9IUN(aWo1L#c^V?XsJ_6Si1mWd)s{J~}73pqW*RoBbC-erKB z<8QhN=GXWI@T$1F%XI{OcS^?%&%owZ(|`qJfd<_(msa>s0l=t6*+H6pvhfmX(UyREUp05-}!xIEuvsf=1V2!uYh zY(hcqL)5ttO_0cl?+8us8v-2*@c})eYfqP%t|-tQ(w}P&QQ1Lfbr)tC*)IjVTTQ!f zK?4YsnE`+DE`5FXZ;bM8!VbIfd&rfy8xJ>px)<3XykJ5b{PvUVO(e1L0kT1U@qY*{ z^!KisFqEkK%&*&y`Qw=y5w{5i`Y!)Jjq5LtUrS9kE0_G>+Ai|Apq~-{F+3|)Ayzpw z1Qh~ssRfYSR7SYaXq*0KpRvRnXfB3uvcQyu!FHWf=*VQ`0jo;*SlC!EGkL|^Q~{a? zP20QW3~Nc#uaN)@_>k%Hyd8q4r@tI^o|sVBOiOIX5nbF4Yb=Qu;G+_ecw67t`&)fai??X&anErR8~!^W8yw~Ur74Alcp&n22Pnc__q00~!LQe9OD zEXuW+ruw&>TV_HzV#aUA@>dXazMs-=7KUjvRZ6! z4eN71TMl=90Hc|XRJnej;zCI@F>m0xD-dtn;p1`>^igX5akp0IU3i8=ONaOmgcal% zpVh1E)yP51i{}R@2gCxHAvZ$sif?gz!JX1s^E`_8I5bh$<3xZaj_gD`bOu`d8XXGA zcAdka;xm7@xOT>8nZxk*2V4gmW6i!pi3%sGE6zT;? zn-R5Bp^^;u_?Q@%9?s2$-BoN(l?cZdYiuI#)&G0*^=N20n zcLeMy2`VcMjozIUTK1l}YW;>2*;({`#Bz%sDXKH=Opf=yFoaeG4=?$hy=_||7BkoD zqZI*-Z>zr?@7vo=)FH2W?Wa@5^)LfxZ+_1A~ns6%!DG0d0aFL~Dh9g8aU4`UbRk8pm zOA_LcQIjz@M2nZEqyq;JCS&-^=#L~CH7YNv$7Pnq6fc*|Y@L+--Fl-J&S(U$3Kpkq zu6%GLsZY~EF^(_Ql@!xDfHloH`_H2=M181F0?DdUKcos~0QWh(s~L0ZPSWJK8A zd}>5oDm4bUrwaN$k-{GhRp97D z6KLN@jR@C6%Wta~=$P{bx{)NLi2gF!iOMyR%~g<8R|B5rXs#ql>GqblsGj+w2J83$ ztV{*z_^!@y!*PxhVoDiUdjaiL7963Gm>2DxK~rJb{^2lK!^aJ~i$ynHWkMUp-I>BD zJ;_aBM}SZR2Jy19amtKyYEiEHtmF!Dg?=7>x(H3FZx@R*)w!go%|Jn4M=6rI$}Q|0 zPUD@#n;b2o^SvYUp0BJ>GaS%H;Biq@U|ypHALdr{>tfi~CewpPg;@E>B{S;97=Qt5 z&f;!M30`d!pU00q!9}tkxr;;myUNx*Ar<59ycg5QeAAXw{(n5%2dk0KnY$OM%(??o zZBe8n?To14=>_(eLTc`%-;XNUz;0PKzOP+dTVB=Ey3O>>9~cE6Y~Ty<)7&!#$w8O5 z<$ql)98(jd`UUB_H!j^i4$5=BzagL@N6eyKG4BMEAc9q61tTY)4t z4@%D#$DdHd<~P7Nh$pH@r(`>PI92S;PZ1X1swmT%h!}yyCW&+lyRRB|IIsd&@UV>C zmKQq#kkjhc7k^$rzzUYQCwI%$PotjR`%NfjcOPLt!~3pG^$X&V6A-ruS&N9<%lM-C z%~CErNv^K@i~ruHYUZqe_)mR@CoF4#m$8T?1PKb*BEf-6h-3m?#OCwpVyIYUM%NA& zxfm0W$6zxa;SJ%hAuXcsos{9f#-}Gys|2b_9~E@FI&@U|DTBVZFxeo3LtQ5G7r&+h z6{fxf2b3^`aCF>>9d!}L+R$xRzn_kI2eTWBl@{&yFtJVThCNPF457Q~u7^0JqIrgc zL2OhZCTjeDBW72;`fbz-bk@)Xi~p&S@oN4iV-;EwA6IrKbPJXd9Eb5J!EZZ_emAMa zrLMXt?RV_5SK5vf`PpmYQyxHnUW*jt|4hVx5U|crxXSfvtM)M3dl!TqvxM;acj#pc zPVNGoUEl!Vxm+D9Jb3*x!d%TrgAtqbJa z2`8l~!Jp&t|JBhU`nlG-z62k7Wnr&787j66!78jaJ42(blZ(9}xvsht;Y-_DFcaNG0zG5LO1Y1w7_(cz&_zRXi^bO9Xyvdji{ zCX`32%G4_vyRsiCL6SJ7m{7^{tf);7)Ax^u>NsM`U&QJ>ocxYvgX z<5pVi2Zi^c=yWX6G>Nm$l({x3S7(gz4t7ToqRXPR27;1k5E*Lq5O22GFrvB;@mx*y zG3h`}yFnVuuqdSm`|Gc6JZ4x841FHi-*5Mjt1nOL94JeAvyu3)*hqOP19XMdi+zt0 z1)-f*LvFoKS2b+3*^2#!f&wfW%zP~8XOb_c);oj8iOKTdHY;@-tXofQcvT!{f{pIrEZ%gBdbI=6nC+te84%GGGbT-NeBWE=qOP< zzOXPNa2laTv52^UD(Nxta=#|G3E^?@r6j@Hc0W??E=(EV*P$(n5jq{B7f$G1rIQ>A zc8m3;RF=@exFg*jR|bbQXp1$U4&HiqQO1&}Keik{8O@xSs0sV7I6%)|u5Ea(gJT)s z4ArM3XAed1I^dSIrz+?}#2lWtins4t$tfOgHljocw$%O!ou^b8QH~%LB#_gh8W6@9 zCzR2RYrDpdHMGH+5)nhGN)b=X*gXu+jaZEL9QuJSRY>` zIMn7xZ+2Gz_*EhelXt5fCZt)a#VQw=*YY`><{ogOGO>&s-^1QTfS9g|t$mQu1&4;! zsiB|0ahHoG(5{VDl)1@ZD2#9IXGo(Ud6R zn`uf0b_o)8;Ru%FL6P;S=xavIxSGAF3`H12KQ=r?I=e~!=F_!ix~lKZwi0FpWIqv% z8-@`f1+IVPZj^go@PJ;z`M!n#gGjxSy_vxVKkK1EsqS=K<;w>@>5UvvP?S=@Rgf$( zQe+7wx-y2~RKfAF8giqihJJ$^m>pt*&Bj?N6VrK`hFAP6AWf0iq7+Qj3{u9}B#mm; zUb4&MZqgO^`c3WE4?c=yn0vT}SRV$w37!QsP41M|+`|X5$0LA#P)#C+%mOoABVvY8 z<-w^0PP;EXEg6xmI!q}{s&~f+(I1p5~e{;g?HABs}@IIYZFHd=lq!I4&pT+8^tf&a6?|*4HCAw34^R-pNC6!p!=;*fYsJHGXIuiBdTXTOwKfZ}H zV>X$;ZblIIId9nE@s)>W!(6`ch!38j)3u~sr4Y$v{8rWU!g3i z_Nb(^dii;?2Nq(@k%rU1xMqW}1~FRV%yrdfCMmfkw?3pu7Gs@bXMP0MeiN{K+`#+l z_xuB0?BHeXdDLk7WNYiRVT@IwYxU=Vl`S&d)_|dLA`(Ror)sK*5KSK7+b8MZ~~(!x2k&>8*eX zZPuAM8nbTY&m?K0$Upc5dCL7BK;XRvYcn+bR?;7x*KWh=#+ItpniMvGNzakA%F^W2 zB?P@$op{~AP@0Zo=hfXTeZv^3A}9kZdTQSzy4wysEHJZPS^mJPcIby({?`vzh(1Gv z>?}t9+-m6p3o2)BgZSI}8jopY`=eekKe~i5dz056=S?g4Dnqq;8MlgItI?l_t*z`g zgfr(cKs2@BKiLg02W|X8?&czwTYk$tGlBGEPc%z*^ScM-^n(@@*RY>&(;mnv46}wZup~qiHRzG!cIlPf~k%iv{URFL?AZ_;U+W1(^Etp@%RN z*VSz+*qcta--n^1uOVaGEK14I^EQq6hHd0)!fy}G?cunlvD!-37iy5KV`4ZQqfAa|45M+<@EwU z4SuGjR?w5nqE)JxTFmOign{MWMTVnZBPqW4+{V^mM-?Kt=B`TZuflQuPv<#Q&rYmL z(M0#@fnt&CRCh$nuXLf0xNwiO;gdN7N}94@gQkX*5Xcp)q0b&to?J`lDZ`}k_C(vl z3Z$S*OtYQC5$l&56`e>zsWO|yuVSDk0s=fr+o}7QcZro=Q^MEXuqJWtd-0oWSibxu z5=tt;{xGb0j6f>a6x%{7qF~z5o}0X zdDf*>VsFlgkg|XeN5>-{L+$n)&<`!rDt;L|IQb!`C%5W_r@Oqn1c*B71A7ELB)rz$ z2U%~4tO0NcQyB^~p}3(XJ?5oCT#5cJyW_W!^%m;TL2S=Ga3YIx09uG-Rj_Khp?$sz zF>ept;S1lpRqI1%j!uY1`$8gbV=~6&ZK)}Zq&l@*WI=4Kc0$eB-?x{}!MTaq*K5Mt zNOh#aZ*-(r1=QrT{{P`)v)tYH4JHGnqgPA>bw{Xon}V~_$gcUQF50`G9w)|~{wA2d zV^iywf&tbXj?^@7o&X#nge2BFRYRB@M-!l4KW{^&W73 zjvYNZIOWl+*_5rl7)-abH1ktZ#>9hhuJKbdgZvtbA(cQQ1C{81P&-&PZbkIoy~5b* zev-|EU1@Wj0@Do2v(dxLZg|DHb3Et|-0CfRb+1U%s&>m*aJI6+>#&8*`<&X5P_&i% zWp(P5X&k~dbZ|+m0lkYQ3QHAFBR5_Lc^AB|JcR}R{`C8P z-s_laqU+RCaoIO=Z(!)&q>~LpAY_P9>q8WPq1kVQv-q*2*cn9an04(`%r>9TMw0ad zdy{~)%4I8Rp+Au%Q3jgWjDLXt6fjRZ@Zm5HV3f)G>Xh z<5=gIY`*MkZD`g?(4)LC!V-Me>HOFT- zMZbE$rsF2?W#jt15!>^h6bQ+CPx8TKgnk13(gGj51td-WQf!z0N9aExQ2b5{H=P@% z*m`vJXGI9km&@JA_Vmzf5d=~zO%LjXoT{$~k@(M=8?_s+nujB=8M8w>`&Hj2+l@-riIOYqkf_p@+zwbjoNA~s*P{;tKqoC*_?)!Y-ZE2Vm)lxQ8 zmtQ?rfZynHrmZn=yAKaF86>chDWjREwD(Ux2mF+E7dI_Ov(2*Z@%)2`&3YraVcByq z%(%oY`m3GSq;x)1mi{D#NkXe4kjcz0SBS!B>Rv$L$&377 zzFpzGfGSc(yW9wTHS?(+C@W>u{%%WH34(Chdo||cfKqK^<7H=pFD<@Lp@p<_e~oPJ zii4QDkym{7hR|=6m@1w%&jq4x0w3$HyLR`#dR$GXNCd+sF!Y6Nx$d;++uSD+N-wkQc`*%3;~{e(AGxNMQg=v`65d z=pqo@%@c#gph;-=(+m9TF4|AJB)hE^6CGIwEP?K{ZZy!?%DdW3PKtakF#Q)-_h|34 z@?NT1v9!bok;y@}H(8&!W^Fc2y(D-v6k|z?TsKQ-DN8#NUwF%XP5p#kAmkU(*2xU3 zwFBK1xZ7TB%_9OTiC+{Ippld}GiN*kgL4+!=vNJ_0xY`Pzv507qkFklyin~q+tHw$ zzql~nkg{CP-(8R?z%)`@Ml*z-4;lcOpPeM;*g9JvClM`@Av(t~N z`EQRii$}O8tdE1&MnTgE_%}?K9um(YY0p@sTbY`#}KD6lxJ40k)r?0lB(P7?bnBc@qgWn*R@hCIgI8-q zPe)H?A2}gZhPW_v0PdT^QC3!6F>?+w!rW;{ONzm~5NmQ!B*#zp7T4nX0qPZD-t_(? ztO~E^{PrPWWZaLB&sdQ9&{VupL!dSLtL`w?A|&BOeSi=4>J*UiX(Z4&zPxa+UD*Wz zmj;qtXD8YQUL6lCv0jCU)JkhYqMQRNc{IO9mg*mG7gv|j9sqO*r}U> z(a2$$n$F)TI;uvtj=>fwpg4v;1+e8L)}yBxKLde2>o!e4eW`eM;@Jb5u!hY4@E#Wu zs!D{@L00GM5XAT((nEk_AGLXpB}o)Q&*0mC=8d>L`zPz3?~bb>Vwc_#r5kqvy~k8C z=X)3G_Sn`I2ACZ3o>~Jt@5*R=(c6;P-n{-O%T5QcGcnC_G7<-pUcR<7mAx`1GU}K= zAeJSvEc6gFi|0;IvD?-2dB@?dWkxXMpPKY2UTJHxPb+w9(x(&L4FI`gKAw|*{J$tF zGEb1e@QDPmJ2Kru_gOzMSUiXSH{H)bV1a9|GLH!o`jgM*(+o$GC*to8Q35+<3FPcJ zovye6SnlER1lRI>3FyZ;X8h%h24rHuo+IA?RQ0$jU+$ey34b5~#z&<3Ir!pCWMJ7W zIuec*}x1L~UV3F#%oXQ9W=<2{XD&sCS2yiit_TTC@o z2xJx~r5luajoX$(hP4sSJ3y?@=W6I(*<_#58`u5W*Bp1tFGq>}Q9&)c_NfHvIi_-a z0C;b`<)_QttE0l{V;$I6WE_Sunv}HJQF$Q0F@L|Yt$@}iDf4#APHSr0>4{_ z@w+?i_iZ!g->UJejswi&Es?(X^9#e7Y6t<>U4vobGtog-cHW3s#ITc#G%JL(rWNh? z8ngXh>-33Ovv$+g$sizE%_mYo>1*jsq&-u_wQW{Z~Q^p zT$#l4wDdWHoX5{W%5{kNxo{4Q8LRVD5l09B<2F@N1N?7PN8dIZYl-FmB#PIrb1F6X z|5-15jLi9L?8NoH@I;15K}E(EqE z$`QuJ-p4r)7i3_k%A8Zeh9-YF(%>e&Qx$4QyPGjW&=tB&*ZXKcf;K-SPT4?j@WR$s{5eKVE(2Uz7oah+Ca_nFnfU*Z^_E?6aKX|jGPt`9?(V@ET!TX(1cwB7cXxsZ z_uv|WyIXK~26q|U;qsjKJ$K!;ZhzRnpjY?q>gp=9KA3%6sv-(Isp&hD4tNsYMn@q= zhF)ui13E%N1AuRIYR@17S%4^Bu!0g3b>&bnL?WNN6mFLcq5)77cOmEUAjPx@-VA5j zw~$Bq5!56o{~3+Og|x7&%sNP1AGr`_krDbE_lU5(ogSuUDe33DR?SG`IN$pq5sWo* znW2)+t#CfGQ9;7otPq7Fsh;H@6}B+{cI&rrwZ7&OkE4r3`cDNuOMI}{-?TYcaqfhK z_LbXUeH)N9|KZ`}4U|0NRQBO&l(!@1>sTT8DH_c$2t()PIc(rTv1ug=FuD*k?Qz}A zpQy$vvO!`Q+tlma)T8o+c~GNM+3$1q52TkDmgYB=IUQQX)I_s#5!@9r;MkDa0kUm( z>nq%)hf>=j>e>&J8~DZ9pHCsRm=SL=57{xneDY$Rm^#yX^iYVvU-Xye0t7BmxUk0Z z3BzZ%0GSoNzOQYFMwcr9Etr<7fb7|?T@CVT#U2~!D8ZB|E}5SaI!o95#4>mKt-)e7 zat|gEL?|IEgdQ<+h5`j+md?^-7=P9!CQL%Jf|qxHL}qW(DD)8);z(PQippMC?b|nt za1V7TN6h9~pdI@|7{UA2$?>m-|C58?5|~sl+_2XZrYO=V{&=5MRg9$L&}N)O4bRRS z1dMY+#~ao4U$mME-XS!YkbG9E^Z1Y^|g0)8(3SUVi`}+Po+qH=s`h$vAt-hZ% zIHqR>Mb`dU2uJ)^wA30;!@?l02T)t@C-FLQ2+$)<-qyHfqp}HFzO_Po`qPQM$eYij zw;c8J)53T4_+(R^b~B>6;4)5)psc0^A&JWX{iJv0+eyZA|(Et9St4n=D z4MmQ~u*Dlpd`f6eaQn=-r>CpH!7h;ya;dT_eKIgFK_RiM%S}g{S#W{4A$Vg2jS{Z{ zH)h`2N##yXxkpc@PrH0OKd)b|p*5iU1qxmHhd%`kBd2QbE4{wuV6cdjf1=xfD~zZf z&yZ4kSl7?wo7wfu`ZbIWyUeqgRSb=;bX{Gfb#~o#nNKCFQ#QofD&eG_ANmL!JVyJq zGTj4wa-dt9)?khX)3({78|zzL-c0Xbr+y~g)>*dd1-hd88LL>5#NO98dBgRdtQ8QE z?w@eGd^c-?cPf4y6u*Lpyp@5Tn-PY|kE2uvELSyca^Hq4ybKUlo(E#$uJ)_%KZ~8~ z?Vj`Abx!?;-B82pLh-uk?aB?l`v0)?KREUOMTYyj%Yo#bZvdz*=ZDjKL=ehH3J%S( zg5J@5?lr(+B$X7`GdjeQ(!2;1z_;13lw02K0tM?19CbSEy_Ge_^EuDmbH!*5;CWE68$B

k*)R~?8yr~%^uhs^ zt57x+>vM>g$VJZ+(3o;&6F>=8WEg9Lm;oi~go(z$@f7yJFKEo}8@3MM2 z1{0xh%E0D%pFoeQW)vBtpqH{;L8pMcKbQD@t=nl3z6d9X)qy5p?}ai$ac~U7o-97O zT{}nt+7shSE6G+({SxW)GNZvkOyjBf!>qvcu4O`cYTVDpZADdjIR02~ow(feSK&q& znN&{CWkS*qvMQw&*@0D}u%c*|fk5#VKKY83BEtTLB6!SVzf-qF8&UV>oH*zzF$Y3% zfPr#Nu7+lHGK_bJhD`Em zgOqLxXqQ=i8qN|woCtO{&P;R+4PhrQ`Aiu#yul5{5xV=^w5_h{YbKLSRFPPKu1 zgg~9yiN8fJb-#_K&eKLZEU^w_sZqEIH|MDxWa<(gf60BhJJ6*TlRf+5VV|9Iq z18ISpx6mIZ75&673~!3N!nCvQjz4~joW6T8*GCFE4GN;8pt< zQelq+*t@3J*d_9ura&(>wz@u7b8-vhg1%^Hta_?8&w!Z4VI68rt_t&TXNX zuX0}QtPCEQhQ2TDbob@e+L2av?cqnXB5CemCwBv=Ygx;Ef8){YszWPO6#n~;?#bj? zy2b(oOo*d6+|ptHg(s8?#Y7BFI1(6lelxheCfySxVH zvkc#f{X#0Q_ZiCQmui4*rOBrURG)0j2S)Q6M>jtvJO?!oaKaYC@9RY+ES7-#Op#7Y z?HJg!$BUP<{@QKc`lL4h7?T?KL|78Q5i;$E?RnrnZlR3M=L2@E!-|5nW~RMv!*cZu zoRwm8=W1;K^W|qbHw=ZpXbzpp-^csrv(yPtrgOY7$djp1b5k zU|G!3b4Hd43(HGf7veZG?N=2V&^60WaYQWtTjAYL6H;D9>48o4uj{eF@yhk*TJytD zF%bn3l-bN^q(;ol94Ck3<;K5z@x8Oj#=Z-zn-s!NiW7W}@B*KAK4rlhA6{T}IRzzlH8B#{!!p|=kWHwb82vU9$d-$<5DQ( zCx-ugtK5dq#H!J<-RRa;%?o06>mj|v^UtmlI6R7}my-_}#_CYe;lpG1y(p?=bok~b z=$$+0+5GMG=!T@)dABaE_#(KN`EM=wW%g;n-L>>x^18oo@-UpNZ- z#i}FAyZ_4tpdL8txS|xh;*d)4zB0_gTr8%;fDhpu|EjBg8dV!N>l~9FTv3Lkwxf`? zP?_};S|aC44i5gWUsJ1owRJ&hCuZl6m{nFo(9!(yG8IPcj^pO+J1)y$vfCcErsC@l zUzV|0ah>>j=ao)Av@}7Qj9Jd~<}z|rWlzqsa~}^A@%T^|F}nbso}gQ_Ln{7bm$Iba zI5<-ypGsMY1LwUj6va)s$%@Od;UXL4F}|qN!cMw~=AB!8O#|tBc&zNHBc4994$ne3 z>UMX%CQy*xkfCOLlnDm8Z&$Sw{U#@rFs?FcKfxB;WHUo;nQ@ii#TnwZ)a`O3uEhqZ zVaQRiF_VwY6h)oow-xHrZcd32VCMXu>!BA7T)^wEFTTH3;|al~eqGV`X2@7Yb!FU3 zUszZV_m{bARx)(7qbb5}`EW6f=l)Rp;mvygh0H=s6qMfsT@E`pb}n7@h5K90jzESw zakP31>a7=P`SPd4F!b5>d=$jRA(s@y)M1g5tD%CIWLqDYT~WG`r7PZmcFh<82D1s~ z^S(K4t^?xdS#f%7!E{Kpj@lF7tn#tA@N8U~AMVIDu2m#xA$C)g`wV}jNJ|Es8{rNxVq1sb?es0V@TUUnv zbIH$B(Fw_!T)M9j*|~+REp`NKm?GCAT(pCG$wQ=|p~h>b%0k)5r@`&lcmRJX75`*i$!(}C<}ObuWA!gieJ%!87|5{c+sJl2_j02%=#PL0r0n~yAAZl9hQ?O z&zMn1bHnAlQ(Y#!9}qC%B8p$!v-rIPFx|I;kWP`a#aweHG_l+P6xah_BCgIB!cZ1V zBNnA*R26dqWvo~X_p$+!){O~gvzR$5o~eiQy|3H`!?i~Q$Lz*u3FZzz!i#LAUuMgnYkFcLx{c*Qs1pkBuViN@G`z`#DWtB)6nlrn79cn@;ELfiekTv%=@o1S5rJI)Wf<@i37J?r+0j;()^Yq zsK*LL0T&Vw@!&RPM>|z7DYpS0%$v_j5^c)?P7TQOR%ripaAIaO7xI%yQv=2HxwY#= zsX59PtTe9?mzh!|Z`aIrAkk z4&3|E*>E?U1d;lz*FePu(bQzbohlxG_@egQ_SF$GK)>9{UY>VvoM%ZM?j)oJ`X+@0 zi_ns0H1g^vdTKtt0B05dD}xfWuLu1GfP)hu7I~ic|MeVG1_nSOX(DETj59>vg@HG_ zlEqcQya60=z~d6|15*3ZY172lcQ*z{On!2yx;W!&^Y6j_iG%W4Fr{+pLZ0BS^nv8f znO^|?{snEaMzOH&dsb*)$Q>|k+AqjpovAd2pka$}G1-1~Yrd%^E3^J(oDd$SeEzN& z>@pClrQ{2IqEn9SD=fQr=?Ilz?7{9!dBL?tH48X7vtYuqe@}JF%fLqpK>=)26vN^b zo5MJ(8z_YGmuyLv1sZgt!++mY;gh!k6~^6sUHdRuQD|k*Yx+Rw1KL*YBuT7mKWx?j zWkUQLudX_c&t$l#Y=2me>x==MaV((_{ntW02>EIA{)F#2^t;c4_S8gWWibI1B=bHm z?gvLRGorXXmeU?Z&DCItaU9c_IYJC@YNSacYru2(+l?Kj!~D6R{Pap{T79s*o6^wc z%q)^iFE3V5!6keka*qt_ea1#cqag+xu|?0dn1GH^+xNh7O6Eg5iBi?b&nuq*0=6z^ zdCnL$M?9JCW$7ohAyT)*MegCb z-|O@^rOs<3*_w*Am?CH)r2Q;F3z$)%D1B)4@g9S z-`!nqyS#DU5j|WBMtpQ1uwAbuIN3?NFuBM%hNal>LV)c=se=F5uh}5~zr(X~DC7Xt zWp4G>eq68J#|*wcemwh+I}@~EQwEiK(fQF|Q*z(w_!aim++Xh|)(#8~OuFb#?V(iz z+_+IRlBSs@k>tsD5eS`Y`CNtzforBsabL<{nfw$4uBrTW_yQ|qCZIhP^a29_1U;#F z%$>4_szgL#E+#=iT}(oy%Skvl;8Zmp#c^3euu&mj zDmdD`T-`uxA0U}y@X7rb^-yWyi3GlWY}8LtE^k*%z2xLXGP4tAYB2NN(NET^a_B-= zyq(taEiFUnBnMermtPX$*)ZIF_`>}SRn`13X|=LMphe$r4dC`$9ueur2^XRZa4oXN zK2J~*T9C{(gg8$YY0k!nja<-MxJlSye1YU7Gi4VC>|3h@X-DIFu+3=_8go zeBu<@H>LVxS}kJ39X0S?8d5jqlOkxa1aHg{CURvpo!&vo9|7HReK3B~`$`RVw(qQC zMm7GHN%^bGtW4$@x;%)$baub0r!>O!4#H4{smW} zn(qw7*%wCN{QsL!CAn{tDhvV6Bc22rMnC2^Dz;FXd|WFUUw6NWqJt+JuR{adW(^?9B&Up4 zjolkItNA$|FKUnjb`K`uJ=&o?^)Ude-Xr9LcysvnAI-(9UbpqfvX49E@a!4itLXjL zW6$woA^%s|rROQ%MvXW(y{*|hY?e`x5&ME&4%YhOUD=O|s_tR{^AVq4{95&Si>>;5 zf)~j$cfPRKcJ?f4ddXfH!ReB!>e)rO76K6Fe%(Vvvm4+$a*LDRdxLX^|G64mb%aq{L`C{BKC3pepvq~vfePp? z*&k0-*KLwo1FX-e- zuPa}4oU?b`G$d30K9cRNvCVvDTmR-q;q{N7k2+|h>=TD*L3&YOxu5ZI6LK&4vLRMq zbH3I;A?05nDO%@vKV=m2k!PN&+@NZp6yG-nzURl?^l&7v9@?@;EhuxLH2e(M!$ay= z-_NUECpQ>pd~F#I>% zHp(FHb@y~rDQ4eWHoK^|*EsDbOuvt(^J@PQ+1 ze2x5Mmx#(_C{|>B`}ywv-Tv{iiPhmJ7Z+CwwkpHJhISY1J=lf(^SO}25*iE0yh<<9`Vm#L}Oa`U|cdL@uaR)LosN- zev^}!ErUhQMO$YlR6x6S$U~f~f-MF@cLOQ5W1zA8=hT9ORJsJj}lskNq{^PnQ$a1M^;j!T5 zPg=5|z={cb_Lnk@_yLl5PI;Qn965nNg|grfPZweHZpQJN-i@_`G}&{@(WOX2;39m) zwNxlYM2PoT6<+k82HHOl)-W+_;m$rxQ*35=qQM~e4pAv%Mm%2AS5xo)JDX_Pc2P?P z9?KudyxIY&Ymy#9S!FDh1T;{gKQ>MTD=FtaEaT9nI!lmyIbFUpjRp}B5y_(B`ib6;zE1nCb=BL5lp}4wjL-uynYpQ`?DDS z8XA6nr3TR<1qUbWcSy2@WB)}EH& ze&*;|L1B2ygWCC;d?`OFsMO5Xi@6>RheKm>DSEgu{UWl9Zk1DkpMZLutz(CI7A{s> zkx}j-ie;mMU2wR;izGw?i6E3JtTMmKjzyx3j?<7Mg_KD6Nm4b(38*Yo2eTg7m&B{C z+ok4|FeuufqubL`#nTUIvhMof>L#!8p?~WXz;nWO4)mgc?|_jOgwIA6vlG(;N^Nh8 z!K_wTQ-y(ru%>Fh9{!{pF4Tu7&W|{TgFX;DN{?_KX)jV?y_cog`~9{Pv1FOX4dRaYF0fAqQg41h}3Hw1pltf7CEt;kdj4M&&km?Hegf5!^lgX-) zoH9M7M9a^UFJGCE8{sG-kQ08PvC3)>jt1LOK^@Rh2j z0@#pA71(+ez9D6N0Qm)%HxSM8W@cvAPMG&_eX;RWm%_ZiWB4$CupS>@>#*cC^3`EH zwSeS7uCHy&Uv^Ul-4*-&<-i}wt^Mq3;esDtx$vz<1gB5DsRtw8z4f_Wx|ZoOv(-Pw zW0yO+A9{`pt~RlcEv+B~dw~2RpDQ8EJv~WG{KM)`qIdf8Z_q4TJK#&rD&pk14=S1W zmGM%CKi^F>dgLdC_&b!1ak3+s*QSWJ100#ADx}DJC^$TqcK}A62u3GU4<+YudMpng zAXLueKqVJ|>GqC^rCXY({%2J_hx9r-L_$wlE+A>PnRwIIt|LYJE=XN~~XEfv|$oGjvgiUTTds4PWe9y|`_R zVSdX=$M{P%545X++Xjxrae?(8Hd{+F^f|JZwdsJvh}oyHGRp{8;LGv|KZO#U1Q(xh znPHE-0T~o`$r0g9yWx{MXUTjgWk+s($IHH}-HL0O3H5nJ^Pelhm}~hg9q|)uXJKS< z!A>O}1#9sj=aC@vQs?Mg6;l7DYFsk)Pl%hkKWP)MC3#u(3@vgNVPRBfP9qtLxjb<{ zw7dU#b8vuXYw;FiG*6Y_S{_718AGFzZf8GXv8eU9*9rt^s`_y@Ki;*moUZzKthQ~S zbX_H2bR1q0AiJ;oBXk(n?)-zR9_gh<5&I~IL`2~O7`~^nTh3iKvJk%9w-d=kHon&r zcln5~5qhXN%=9_$g)wll`2HWg-T?LOQssT4pWNjQ^^J7pw!aQVttv)x6VKic@0BkY z=Gne;oQUBx&zW#?2-5-d-1<)B*ZN3uxsmHj{<8ZBLiWE$@Q1Zl)?LJY$D~)EC@F~8 zwZaV37!jt0+cToNI#dyTqXV!6^}}L&Z2KlMSZk&b)RtIa>8jzYV#&e5;RNi8GJnbe zSZ6K{uABEt3Xz8d&n7+aaEJ(@O`iGU|0pTW=R529Jsi}wF8_eEFP3LDtn8H*u1D>Z z6k&lnvL9hq{6hK~G%88SP{xNpUvdzYzZL%wmM6uSSmx`5xMlZLek@ipJBN%@NsYBp zW9-(5(qWLAEg2{|lpqHW+(#Rr`KO{;7gI#J$Pt3m@Owv;X;i_9VBR*-Gk|d61@w^h za147u%;ebgK83jlpKkY$|03(!XnatKrF6cbS={8xolk54-unl-c?rN9C{zZ;vx$g! zd6?fjh;^HNoP11S@w14-11onCn^^_;ph|x&_$lX2vnj&)kyHgm`ATHKAeIVv-w}5E zS1!Jop(Vwc28~GOJqi&wbc0_;D{Xqk1Bq3eZ;~_E`LETn)*s?v6AQ;-QQNj(K`9mo zqk`dzUx^NI(Vg{#rtreKJL2&X^MZm9%b0qhx?H4^D86e4Abn${SDSLGr;CL%9NJmc zlM<8-ijwLHxo6OkrevhqTjf6P5^oD%Po>ETi~x<7v_5vop3CZKdo9Fi5uIj8IKd1w zV&|scNq%)cszC`KRXzfL%$dffC%wTrr4C;(@^g`lYoCo3yrJ?8!vENO!#c+aY(JnzCRGedag5E#Gmr`y?U#}koL&JGW+i~ z9f3Y1(nLdjEH^lTQd$si#x6?ESy8&&Z$rq)13?#YXDjCS55u`{RP9d)25k-)=?Mw7 zG=YyhxR!*f16?nVAkr&cm)w`zix*k}b&kR#`?p1Kkwf9pMZ_fdQTj<_rwDdklo0+) z-)+9o{g~~yO;U0%0^gyoRqzYDVesVpKk}qM+zs~FZ{zf$fZVr1e;VzYyZg6;yS@_t zrzT715d^=pU>CHoC!xKyS^U%GSl;q{GropBSw|{(+FoeHGKQ8P$-~)o)aehx${Wg- zW{IRX3a4u4L^LiasfdX3SjurDz7j0DWuMnlW4C4owQB-x$D7{Sgm_Ne*zZhs0*Vyi4dZdMD*7vuzSjBOzdbJ4(C)niKr&@!seZ|E6Ue#fW=pn<%PKND@RLZY4jTQV`CxBU7Am@;AjEIZ=>wT zjqykKn<&l+u^LtDfv5lY(F+JURK=4pmyCz3NxY66V&ZRhf{@HX0BiNy~O_Kz$ z;0LSMC%z8McrGEs(if1V;!BQLIyOXZik%GSW!gK4EZlPc3Mxfb4z9R-IxU*Oy;JX< zD%0Hsw)yXWs}NsRk#0}Po430@JR-rgv*{eVn{|sy66_C4vfdY+r$}sD1}Vj|ZeXy5HNghGA4?K4Q=~FeIim;O#icFaZ4U z47Ma4nsT7GUH;sQi7X|HBZo-oSx7T&{@w&Gfw-huJo%D8*Z%gp*dZdsAfK*yz0P{v zhCJPOa{kXBmmS$sIo;c?c~v6H(t-nz7(UQH?(NVtH#GOPFE@O#{iFdm=Z3`p*;U8? zUzk9?Lfyml)%L;rsrF{7|E6iZBOtJMJk{?VH0#e?_P=))&o^MlK<0IXr_Dui@A@uC4w_RWNIi1Z1H#YG) z+zFb|a=gVqB4_A88fD<0acL(03ir1@aTj^sE+rd1j061_WI}^ZREaXPzs>$xVfQfj zC#rz!zoX z_+%K0PfGq^kHMd8OnJ|{)qg6(0)%)dXH#BdzOo76B1mI*RvI0su}&5*@?5WZV%|s9 z#YZu zcF`)zkF^XT!1d<BD^6z+TyXvp? zFWmty?JwOOt~~xb==$tmcX`3rITh)8l0n(Bptxudkycto_chq`c2Ff<8edW z74CLghuzz+*HaKtN}a!-{j^ajrtB1oyAQ?0se&fxo+Pyn{Z%vW(N+vVYu@6xAOD~r zIOAwPq{E>w!JYC&lAX7PtDCXJCNR6=A~aYdF8D|M)Qn3L(5OvK@rS?UsVX7QTGa*Zh3q&L60%&Db$DojLDx|J zO^nVRhcFL-+LW4uM-%Jun;VQketPr%Uw(NZ6tN=ghbsp)k!l{rVhkmZns~9FUEn~&rMEU?Xz8lvYfB2uuq0i|X{=rIYxZdx zQ?WPMszHR=^|ET_*mJ($ zlloANsDe20=PTQEt<3!I%h<`mojE*~RzRkP+JpR1{ld^W^(~tef4oTI7*xjuY>cu4t#0He&2l+pyEkr^ zly{@*CitvH7VpPB1e8{x=Or>O9%=qKgK3> zTMYSD0jt8?af;Kfei*eB{FLJqM-vdp$9`1p z{o{35gnKPt|BhifnaN2Uv$utok29Z9iuXt@=e>=#6erIN2KOQuK%b(!MNPoMhIz2Q zX7u9+{*N~Ow6oj%)f(1%JY<73obW4iOsosWe8B}Aq^0Vi-8-LZoC1Q2)$EMGqvFbh zX-Yz}<8Ha@&yv|u{k;1SQupd~DxcE8>v=o4esw|t%>+eE{+?_;52>`n(nW7%_mznmJHo-laAN^7yl^^eh zn2dnR;jlJ@-!58t2>Q1d;1KoK8i6a-jHe?M<1$&{t-~61WSsGo+#e_$VDseH&!!35 zdSWXZhX#x~N(H>V($f^jjn_yWx@>72g-$FOMMmHc17k=l&J$x17$4Br&0fw;^E-F| z?tn!z-*g}~1PlFKX9-T+u1cA)Dr#@{>NKo2DSLln>rUC(IbBg^p;aOz<*<0>RwG-u zXJ33vbLnHbqPwlagti5glKrkV1nxv+?W9*+HAxrGRVcOD_O!<+8>O4s#nvUzmal z@n(RvyG3?tkDGfn`0Lo-tk3zKzy(Ii+Qn_;&A{Y$tsV%3BPQmj_*bKdCd$$bg4WQx zvsHv7joEP#esgpIUye$3cYH6hQ4+hHqOi`3ZjLB1rLW2()5Z44l(q6u$j7aY(`8Vc zYpGGj#?1jrCAsL<3--OS+}AJyty4W8C5LO{pZW6=LnsMmKuB%u29$Xg?;`E`UX3rI z`yE2=iy*svMK1=}y_&jPdNKWHJ`bf&Cc~#`2tNW!N6aA%a*Dn50X#BJC__bl7~$V3 zI=pywDX)Nt_D}S%;kcUWIX_e)CK3c(gP^xy3-W%$K#Ad!?v@H9RsvD&7oBlim1fe8 zkdq?9N5U7*Lb1u&A54sj>e7g9KdkoXBXZshu z!qgE0U-m4sT11AK3(wrAsF0BOXEX{O{oqqphQXEXA1@%R6-kHxA~ zlHnlg+gkUF@*zaRoO9n(a5^7QlU#$Q3N(RC~7OAWAb$q*tz#9Uad+Q4l^!3koh$alt$lZ#Jg|U@! zOKQQbFSEIXh2`q&g(pOgI~ol6dO%^G-iY2%n74cprKI2%LlnNmdv%eZ28R_3@Q zO0K-)1nS`$7Z8T9s>fpVjFi~t>q#=b5#Aec5E&v0!~%p!KW(wtj>=6mK22KMrAgp1 zjym>p{SH}6^vaaZ3l*QykW?)K?_gT4i_+dHc@sM*jFsJ7A|Q8_7Wzb(3H=-mdJfMv zWwGMN@(hlU+HZBr>O&b~$nf82Tu9NYlHKgfuewoW}z7orUvdG_?zc5A1-m=C;G?aPpIH)_#?}gRoKG3rp`q3CLqq z2;A7sJ2uSUd0Cz7hk+z^2=F@2&xY7nKTWB%V1%WR=zmH<`aesHQ~)j>=2uOfmnl6Z z!gF3pxSw)s#Ow+MDB*y=Kz1#0s?huWqkP!WJuRB$T@w* z6H$<09p=bW-X*^|Nl#~2>tsPP(%Agjck0_Wi@f+0&eXzP^C^f%=$=T_Ju+R!ylY%_ zFX>go=^MK~(tv~XVr)!|T1NtKaQ&U(>9ex3#NyWYBRmL*u6Jb6FyQg4* zjG4dt8(Yo6ts3f!IQy5zCFIfVa}CJ{QPI=J}8be;*aGExRkSY@i)U!|zh zx)Fl2lYCH3+^1_U)56dT8=^#|^R;i_aY76D8LpS2J_Duc0EZOXS~Ka)3JRFo8H|*t z4*7a~t}fSkMpV@~I`(!HZtpab69Ya1dLHp_J-5F@uovUX&9LVOZu!t)Q(3;VPSe|t z^6+8u`p(aNAI1_oxe3ZUvD)PA4#Bt7uqIn<)%cbbeZnD>qJZ7p45W|ls7tTD*UBGj zcygXdIwU=7cR{4pjbnK(i>kTLIHA!k54tdMWWE9y#^C<{KvhK;1PowEh3Hc>B-Q^Z zRA5LIx0hghNb~`^`B>oaVLRECW<<105#;r; zEmgA^@(xke&il%br|kUbBEQ<4@O^g`%--kizpb&x`Ubpt5;?qUohspQ@;$uux|uLe z1BQlGlw*gLQ|;rbJLwGX<0mkaQ&FzXVLjLF7_H0G_bojDd`mF>F=K3$khGjJLgmn5 z>oe^A(xC8!(lbSuYsvGZV}nD}k)c>k`0tXE_>F9xz=~k{>f9b==Es;x28{2fhy3sp z*8)`RZ#zq`lp0z4?fa#FMa7zOb`LqBO9y(LTR3Slq9a?3{@Ct>TBeTDlQ>^CP3@n} z`kNKZv^!ktp~DKKJ=2qxalw4U;KKC#3>xiFoun9wi`etU@^k3iX`jv#~&Tz9cSZa)2^K7TPLd)dNU}qSXe#S$3^Tn5|G!#H5mj?=$l^M5Axypet(UbMHRw>Jlk%ja-Bv%l%vC z`R_>vL9~?2g#W^kLD+*tD4@~6X#r-DX6Mo2!d>8f@-fx0xtsSZPz)QVphfyhnZIaO zAUe);B>_>)7%K;JPKYKyxSxJgbJMbm0oofv zC(Kxzs@MBx$%Mr?=I76z{wO>LqbDM#WsjF!NvhP`Kbl1d6pk2&cDHhk>74)|Q%xSG z(s+eH1#+^eq#pEXXFfJEot>iN9qxB&6SM}co)L|Qx?SAMhLEuG{6*&}Ry9PXOfF`` zK2oaaUx#eNwfSvl0)!-Q1~c4_hYYrqRDG;01Ui1`G!I%J!V3A*z$z2lh1%Iu&65x=BAhDlwJbhF5hLJ2j`ZCIVL5wF2sJC8 zZIEYKmDMy#qKnry5(ZN+fFF?6)F6AHqq=Cyotu4>w@*WbZYbxrvke0I*q} zG3ftfzkzD+|Gc_|;15CADzB&xs;KGAsD50j(l(=4l^KlgX74&NO<{+=>>R(KT8tBP znu2p#C_0GeD17kuyQtaxZ_!ody0E)I&>gzQtw%I(tidC@G&D{#WG9=_cLEf5fiYWde|>wMt%(lbyx3^i>0%} zqC1SYvAHP6;Eb7DBM!uFxqS9HsPAfYzi{root{=(@)^W$z9=)Nal!3Erl;YD8XHo{ z?8Y^D;K2_`Yo);M&BW|vSyZE@!rSJAsH~7+T{FZsKCV1YOPhzZN z)=pOeMnzt%AI=Cy6&lY<=LWEnFmlW2U75utb*1&J*yr^a62m(bnhjEUN7x=&1;G_x z9gP0!(R-sr1jk234Y3a&(Q7P~?MOUHN*iD%Xgfctf=$s&XBwmoWqAy)%0TI03HB#zbh0&sXEJYiWf5EE8q4FD1oH*~ z5pumb!&3x7kq)MQ9}riG+!_Gg6CXJTp-?)o8s~7|I04Zt2jQ#iXnDG1b$yFO0p`$7 zJ@4`&?@b`VL6lTpL5BfatZQc2h+a~zgFaK3Lbiu?8&y zHi5KEn3tZ%3KmbeRuC=d5*q;&L2Bv+bWTIL6|TSD%RTz4(IHM_jE4(L3 zoUGfi&X-;vR<+pKjH3EHb2@N5s0K$mH;h{C0aPg}687y1>tf3L9ilb+#^tnegnQW6 zT#J-)gtF)(=GH1)w7(Z{4t!PwEv?m6TsU$wqqi2}Gne`e7-jnnvhMN1s_TB`To`*s z)}xC*OLa{y`cKaj_$yeE=*m)v&^EVp74_Qscv%+U=s`rvE#MENQ@daXQ)`NA2s)2# z9Zp=|KbmrFg#@&@tv_j{PF+H$J^!N(A(1{hLiM}THJGKt^h;dKW5Rz4g)(*$Ou5c+ zN+ReTi+hOOx}RZvxYE@5ObSVH?>sxy;-db)VX6kxm-?g0Vo*KwKA~$idyZ~||F)Rl zDMtsL1PgYI*GOpZcSXQDm5(2Bx~%i(G!n57(NO0E8&sGyr8vCn(sGr2Xe9~O6tb1J+QOpBnRfhhpwA0rX;SD-qPPjg~2@$~va zQ&QXME3bM%dzu<0bjWeYcW^hY#=(hBq9PjnH1^DstSvFz7-8C?f1m#E{=leQKIqP+euB6Qf>=?O*o6LG572WcM9k+ia9%ixK; zmHGW)Q&9swvz(zIje(v)8LZ4vJx(K--xIdxm9JPUtmxjl-kq5{fQQcS;c#=_7wt$3 z?dQ*5W7Lbm$LBW9z-nYlbZj{l`b*i~-&-Lph3h(HQwU=MU&hBvQm~OpdDIAeH|u3Sc$1(g9Gr_JUKS z5q6NwbbhE)2pp*h6Y|@MeT_5bh}VT2>Y}I?OGqi`fJ7|%;vuqr5j}#QTn*}1eF@4r z72u5m<*RXypnz`dl~Z;%-<6V=HYL5=%%+26DHPXGng_itOQ^tZ53$^LvNG%HX=V43 zV-LrAyeGktvJE-)ogzD$^fv$I+4e3BhVV|-k?m<{kIC9OkC;G|xI1F4-3l3rK~u~` z`DZ`w>9&Mr=^_-sgTywKB)PCV)9CQ;3N{+rLx0_H-UjGGzKdZYaqqECNeUil=~m}e zt-m*I8snrz_S`ZTfO7jhy$1Zyx|Zb}{9yO^Z1D6!rdRH`(y5dK?=!Lu#MF^atju@+ z0p%(6w+f>#JCiP-#(gB`>-d`)a-1#WNOHV5#@&iXG79Chxa$jMHRk~h#eMlZu3=fb zJtp*|w2x~!9XbRYQw-a{S@Je31^2h9#q9yYp9Pd7^2~)?;R$S2><=rAc^#T-fA{Wd z!aE0?a;iVMq>s?!`OfeOKvY>xYlKAbWi-v#8`Ad4vcl=^7t?Lh)`$vr>2BNVA1Gve z1(`-EX{(wP5R9I3FA8fN#(Y|K9s`3$UfK`bmpr>b-a&B;{NI;Qu$N!oc|BhETK5m| zj6L;x21W!JG3TGV0x$NYK^{<$MfV?U<+{s$2_1W(V9x6Qb%`thP4QEeA19wL)jakGQN)?@I^x0O4c zZam|c-A@~>AE>@;M6j6AIobX)Pl-YRsYX5Z@;{(&l@$T;;(|?gE~s3`a)Rj+&?!r# zq_BNnsN8(Vd_VU(l_E?1!GT)GGylZgj!8MYyVv)*X#-Hv`s;l~+`Hc=7TwJqDGZO*m4$nhtN%Zb3nV3$GyEF0Dl*+4atRfHB_~$u}eu#`i;l^HH zF!&vTr2JKDZi+{Vrgymp*VW4T_@tj7aGic#gn=hZ zK}KvzQ4szmYMJp(uSK%Nm1>tK=*t%DxXMO{YLvYTA7G%aSqP}(J8hxU|a!3mo&s!7+q;aX@G;7DhgdA|m zC9eaDmfs5da_Xjf%_4`u%Fv9G+?k?3BB$k~aEgrfriJ`l zU)o+39`aP`{S_u_LI_z|_~m$)gw4-l*6AdmZck|lIW;TXvo0ZE$N(Kz87smR%lbRm zE)%ll?d8z!@p^?7c(bl4t9hj379FY~x_I9862pE8r)2S%9@W8&2I0Z^xq@eR>LWJ* z3+c-XV@YRjkET7 z$lPjY=_y7*?|c+o0|hrE=*!tlhSB^9>=QilcI#niEcisY-4~D zfi*p-TD?xJJXChDao>tykGjdEo@j#%=3U*~pf`48C7Ty6y1QWm5t6kOs#$5H!eU$& zAmHQRd(k;v9^6aAgn3^`H+Ge&kyCjzIVacUidw|(LhCcv#@{%8XupV26L_c6Lmw{+ zduQ;*iO%{;bK3TX>hDW#=$3d?fTKzuI=ZlD?YJwX>xuT7D4i@?N#rJic9s(1xgR zU%6>%EzxRAKf*R_E(K=A0du8IB_ts$HiQJ4@GP-c(I~~CbIiPrf~bMQBm^3qAD{J! zlLQ8$*^u#DN;kgRr3HQ>9)0-)zXzk+Wdfhwee#t{NJx?v+DjHHZvY|^@GaG8OnxDc z70`yr(=A$&bmenr#IWu1b4Gy<@eARl1+9GM;jmPuWXne~3ir)uQB%I}j)FFI)h-Q| zNq@Nr)7J5I%m0>l%BBO)4i=8lg202b3etuT+U|6eYqJ`gFxLfqB-{0&4G6;7;po}e z%5F;Qmzvqe!{h7jhi$Nx)TrWK1_${|F-yaz-RR1KTXs72);u>ZiQjRRe_MU^O82Pu z`9M2xAVgzJUZcV{mQi@V9kV{77StBxChNk?t5S#^;lJH#$EOC1xye(}^35el%oQV# zBrmk@F>$8up%Zi$pWI|0-FaddRT#+nSHf0F?Jfy?pGAL5^sY0VAMs~)^MS8iL)Z_g zyrcP6x1Zl4fQ4`j?DoB$sM?}i2%vG;l)C+km9v05L?hRus%EON%zq|E{|O5JNe+Lj zB2WO@0Hk+Jj%3JaKd*Xz73cBtSyi)M8=IP)@TnGdw zS2}A$8IfL!a0iKus@k~B+ZyB%PrIM+>m8h1xG;24I>y$RtzFT>vL7{w7Ag|J<>bB2 zIzuh~;2S{(P*{MF7_U=0W;}~!>bH*J{95?)9J*}D( z%XG*6BpS~C$04*G6X@t-_2#Si%(B(`&VL`BXoR5#e-9^=B-~kbE!iDN=z9Zh>M6Za z>+-hl3})Fit}P20R<@-Niie*X&^1^KvBN{DGt}i8b0_6IEa~GLe!XRX$6kWtL`pgl zar{pTy4Ds~u8DcL5Y~PndS2~*Ta$t@<7eQIR}|1kRg zJBFr$TY2Gh_89?ui7C7lMtz{-1`4_KGPxl(Nfv~fgm1@0Hu$$yToMrya`VIYpYei9 zrSpC~v&RU-WhOH| z1-Ex<;`K?10{yG}nRH^>aIBD&ggzT9;jf}>Jz88MO~2Yn^ODZm`H;0g-h0R6>fk7v zeKBjK$H{x<9k_!u}}IkKksM0d)PoWdwFEUZtK5xxH}x&ECl8__FS+UjTBQg%8`xZHoo{Mt4$C*0ivR2dTdfO@?U@{*6!V zk5$!d5#jo=`h(R2gZ*JFAFxn&yP(bnW7H6JARj!L+)iKiP+wprk53J zyY^B)`f}$@V0TYUCxK2BER=)weX$lb*XvxY=NQNrlu%s^L=9_jf?eS==VXm^Q!VJOwR_N9vTR(&?9r#3RAtB(?`-+>fH_J{q zC@|v!0;E?l;4|vacv)?FxdkH7LPq%D>M|rj7mW_94g^oRNhcRY*RA?4(u@3mNcJ#N z=W#?ha==9j`nq&f8SvoMg$Ieg6_Hc$7AF4$WpvuStk^7n+ybp;QGe8$)S`IgPjubv z@TJhyN&HO|LAy?hixP48Rx{~bF%6k>1 z-KSlofG?$y0@mQa19YN;1&xy~92aOjbG?bLm8S|AH&%t-tgmJL47ECDJ1t*g-6lPh zOd_x}0k;a)?DOqC$xGls17QsaysJl&;FL_2pmNEr?m!|e_RJgyny-C^|Lb4uTYXpa z*86u&2Ib@gXqa=vb3A_=H{BnbP0LZc7NChEB|)kO+S_#O>KdmF@ubuVRye!sG zUbt%;xDy(L&YAx2P6v&afwotlNuluSG=MSoBf}C5*p3r0pqGe3m)n18JVg@sy;{;t z!P$x<4JSP0L2e--n?9Q=lAbkzpe=fdG`M2!z>yY6FOYLmfvm${M^RC+iKFAc_8cgL zkwZi&$ZmB$?tk@u|J2($y05Q?kWmQeAn~AtKOZU>Q+pz$2`iPpR{CiQU3{xDbYrz5 z3Dl*TaurdtVX3M8QEz>cALVjcL5`m5ErQtDs$?lbsWcn5^6&T1AFc7ayeay(@<4Hn z$hi-2?G9ph4}Z?C1Pwt!V@hH*_SFVB-Pc_%WnV!@YD#?zy+9EyXU@P<2u}e8Oe}DF;0B_IPt%zn8k;qGRa1tV` z$W+kQAVP@~aj?akZ#-)5%FHdfw=2@eRR*(ZZy*t|HvV)LTuS9Xsn28GmEG$@aX8*a zb@`J2?w3^~T5hQjXU-@mrwI|uHF=oj?0GEvHUZfs=3t)s$|u^tj0ogsjQNbN6BM5g z^Wk#)dAg7%BTb|535OI7ZrP1#VLHK($}B!yBat}++rQ-^mM2qq$BU+?R|-K*Y&0)f zlc~hmjjHpqa8KzbyF3GA3K1L6)Y)5%`lX7CD8TcS}zNh;KRH=#ihh*TQ ze^b5&9xKho&^Get>v(%PWAdCRIPZbSo)q-rFbtiu_)oma*ys&Z2q6A-u+r$IniO&l|ia_p4V}S`I)=W1hJ66d$17QdB}Y|nS#?ek9}!Ksf9`aG(S(V z(QkHC5l^zwM+E(bYVrB&FvK3H#Sp3AGUAhJUB~$5dLqQ1P3D{I$?eviS4{_3&p zVTb!m?O{iWdtn;>*NF9B{6Dd7c3mI9W@cPy3{onfb<+pfXj+*#Op+Hk@aBMxlV(OA z`UO9uwQ?kRU)keYqMA)g=5IUI>I>>Cja%WTC&o=M@@3{6_l>^FljQ15W#i``yBQzd zeq3M3iSG+uY5#elE=9e~9SJj+b0rTDTo;aPXF@IbLSY8FPF+MA!SWxIRA<~+yH5XV z4)9+#IRj*;d`ox8t%tg`O3Gxf^A6#i%@3RTyIp~CTekUX!fd<{N=hsW>H~>}v|}U< zd^oV{+t*)gt79z0OgGa=pBH^c2K!D59pcwFd4O_RhJ-;4W-C2=^fd1W6Te2qssYgy z52NaXL9H8t<}+P{3HN5hgP(di;Exd}3dXn)mLx-TQPy!Cn)dJbu@(+;8k7^83qrhR+_$~$2e2mwObaB;`vk}X!Tw?sRA$B8I z-vxo*#^TF?zheQ1g!GWdUIQ=vSUaz)H%Dmc-Cc6{?*|g;{-$YwnwK=!C0&+~@2wdOaXspUHc@ zrkxn9b=-d**;M&YMG9pRtIKG9RU37pz=og(EYzQcgkhNi+Rw+#@#Xc=8{s#9F*C%Z zxYDtY6uYPpX2MDLam%%|7$2hI`;>p`O?-kMSymrFF3$hDjx5A-O?kW~fLi@wA)pA0 zN_IG{w=J1p>TW$?Uui<3Oi&vccco{XygF|db+;`cN_yLP145~)6J4Q7%YM47RmTK@ zAt*YbOC-Z;m=J7L`H6y?10(D>9$m%<&WI*EpAf9gganDt`*5~3OZMWHV^OOeOOgf3*Kx}TmQWPC)TP=-jr$WaX1TEA+)uUpAIsMETktWpa8+qciG zA+MJwHRy=hO`P>bl>yhBb>C;6C=l2{+5U2Fh`sE}dA!n&jHRp;8fZhTA9&Kd&ga;I z$Cz4^GU-3>Ar>CjZ|Ya;eRgCF;+-3waL+MLewe>?Z7_g@WJgL&9&SoWJkbu+g4-0*4bJl%$UV(%L*Q^vs|>;IMO^C~EzvUoPmO z<37dfru#vI-|qPa7#+VgE^CF!5lbgWl1>NSWTP8T@J#gBy|L_j_^yxO%{ZQ_ z;f@)QnNaB#@kV$=nSk9&+rk~T^a+Y1r4_=F?D_D6dq5Q|@)T0-477{;L%D9Xy7q(| zySu7);YnkBUM_Fnm;8kE8@Biw&~r~$p2O|yJSPHlIz5WFJzX~8rdDo1Nl@lY>nbui z@@-{XylF(RSZxz#b8vkBAh>#a4Q7`WL=dSrNXRWtcydc^#l~!=Q&s4)iNeL7`D=Gl zYm^d_%fePcqfvoMBaBc16!JNE*M2u_<7P$FlY}JKOyk;P97JjZp5*Y% zPs(j2A}`y?PQyxTVFwoPjs#Yqz%fANe4Snu!w9*i?U|2@n_IHaauhKIGD8RFs}4>s zMH4a;P4}%}xc$X!%7E%N{Q=i#Tn(1Z`}W%8-cc%n?~_4lrtQ0^c#XM`xA1k{rE=gX zef97&5V0tvM(DKzt%r8>pdQ)_rTxqDP?rd1iWd1Fb@MRKuc!kqfL4wGzCtukUVp6Z zd_9;srkZ5Le zeS0D*ryS-`Gf^-K>>|Jgvq39c?T(9gPFr(tgdU5)(EBYrMosTTdy~4dAUQ)ci4$m|4nY^NEf0%then1pPQNI__KvS7`PG2MiXj zfJhgGN5`!K2^o1PeDaL3bw<{Yl2Qvf1l=x|j&jcheZ5d0r;uo{pTd08GVVkKtf~IU za}%P3{A<$_IC7~8y?2?2^|dwKENV$DfK3oVI}r%S8VqC z8>KWWS(5Lb>`0vq2R6w|^d|V4k4}B_3#1GQTl~aCN#E2`AAn5e2%s>_$H-F#i| z;At2F)6g-bD~R$K^mq9qXU#dZyagj0%kUu_rPkPB&FLEf-m)%4myY^*R7BuBm(Sk; z9fxhbRuppalLBWZ&2+^}O}37+H4qd)0$4F6P3gH1G5ApB9Fau}wIuTMafI>iVx>v@ z-Mi-Q+ZwJ$M^03aT(hFY_1Ng2$-;Rd1qD#o)8$hOOp4~|kdH19%<&+c!HhjAHow9q za{s@}5=PZsX z>Bri#N*E@wBUJvV4}e3*DTQ78_8lB^!=6n7yYKzssjD-YE0XSmE6f$)ccX4t{yaZ4 zk@_oVG-6rrknM%PIWA;&LMMt|&l^u!u4*C`v~nCVs<3X8gJx*5ML{!u64jG!E+}+( z5po!;F^XDAK$m!yN^O^UJ`6QkZ17EG@jDG0Sk|?DU1HDkIXo?FsmItV9Yo7)f3X}s zela9pED8WJn7=ywLuPJvNn30j)|9BKTj8WGp29@ZfGx57PUvgN33r5IY++o;GJksI zK;4$C{1#pm@K$2Pj4H4ZXC|x_RCFd!dR~VKT`d?Zym)*jC=S`7MLm`+NE;Arp!lG5 zx0Pk$j(AthPad!75iLkgjFTlP?~S@gG7Q`9U-2Ue$2Q?Q@|rFRZF)lDC9&zG#|I8) zI1L!k@(k*`cdiRV3*n2W9lHrxISav?tAx=KTz_2ZU!nM3bseVp~ycU@R)HEzs=2R5L5ZOG3h^zIra+f zcXhY=*_hNwW=zeyq};BpTNyH@_1(7i;kH5=9o1R{cMjhF zt})&nY7vPpE3RfdO6NN7x=z59ApHmnEt&q|ZvUUqQLF`H^5x*$9Y4dH>=2g2_%tu183oKhuXX6Xk+`rt zTMZp8Q1=2Nlk?kwzgB8VT6(3QNG9%tRUGyV*7(zZm7&AIi%9xdwsdy2^%WpvjxBw+7rA1oumWb%8{7O{Q zH(IgK@a-t!yCoElU~4}_qvR-#%;EAAn9bki{^*2+jc*^vS$3rF69ZzXBEfFN@NkVP zC*B>BcoyRonMpIlEUFLA@Eqb}$Wo4VD_gzl@T5hQ{Y$o} zft`Z=U20IFJvKLhTES(PJ%onEs}TLCq59u%*vrV+Ywm25{o$FJ)=f)HcXWb4XYNDc zbP*tWyvVax{a?+7XZ!Abb=G>EKhmy=&O6E8f7B6^BgiNWN09LcrGnga6Lr$FDfcME zDR-^lAy_m}mBQhvkXv@kOIaq6BueLf1U#W~q$bVr zQghy__II7G9zIx|PMZ^uw#gLD|0f-yS|lZ8(@5R~Pcmjr$8Mr}pAIyJv>yp#=_TTH zf;Lfuhvk<$QTP?+iM8S%76yB~J${i~M!29Ap!b9(R!e$3oTZQI|}tyuje^N~O7y zAcAmzy>^dxiXsoZ$BQc>JdD3#FcBGF)R5D2a|_mL{Z)-kOwfyrZk}})*}^-L9PEshu6o29fHe87Irgf(=#)J1G>Y>hS963 znPE)4S}p9-+$BxdyDZ|?+{U!C9xnY=-5QWKO&cEND$`L2hT8);zUY@0tWVcb%#9Xh zG_JS@e0uujcbD|`vyNv-@_lYq_kE`@4}7!H8zzx+rsa)0F-O-n-uw&n@#-pP(e-o+ zNS-ROZXOApIsYjYKXsMo@kpf8Wy%mX^z8;XHbeA>)fXuZGVjU$?;W1Qw8uyE7X+`I#*VP0~N z?7aFHou~~h8fowI#t zHn58!^3{WYOZ#uV5kF-Zw;2-A_p99mH4y=*pLWr(GXc3BK$5oat#jM6ydqb}R78P6 z%@K25*x%^;YwZ@^n^q4G4`r^|R7PoGzLDy6QH=W!n%ADD@IW?7vIToTp$NLcZJ;lG zTWf&rvhlh5lCB=2Hc0+g0cr>Xcv>_Ipxf0iuG&HDU8I_r`QbZ{y!h(H+2Q!9RJX6o z#J^TW$Q;o!w;Pi`uPe-@J|BheJwMjgZ|$Ba%}Q{i%Dmo0A7v{~X7fS@M;N%$H<#jL zF}QY%3RS+wX;08SQG#H@jYbeXmXK0xiVs%ai)nmkpimj(Oq+m}0?9l0@`~UR)${er z+oe-TrYI?Oi3tJyMeAeW27R({J}JC=8S)p%7O^B#*~RJlvFf#O%tL1CV)9OTjZX zY80rmnu6lvT(t+*PFmQK(vp{rliI)IXdB~B{6Vs{#D{xQdV+zAwiTxCpss#+>JFd$ zt|U|15Bn}F(s{wWCK!{Pe`*oB{fR4{s2Kk6oA4#gvEQ@&{VtzTXsM@|WL!tZL2Z=2 z!C`gs(=Q%d%;Stb42~mxe4iUZGl*iX$D}v80Ls7>(5(a?SFR6J>z?&9!N8q*-|50R z8lD)B>V12gQSxtjgJjmsqaaU{1JthQG1%*W05NUZfm6};{jZ6X|K)4-e;Wg|R5sW_ zY69}dRZx^O39pA?5g?EZ6af)sxJTF0P&H=K{G^qKGV5T37z4@{O$3&%Jjxj+`FxU!AD~VtNT2869o6}PE(*x|Ny0#f zhsMK-QHL^{GFKxruC_wNQ&XzUaY{_-)?a%kCtzQB{KhBNl@RD~{xq zT~3bI&H;^*(C6BJ9$T%ftR>Lsaa zzf4CuT7n^BRNbYFu!+gjoM(xNL~c`tYMOC74n!IAF(B<#6O1@W-VAKPSAX|-X6nzc zXWBOwEOUbcJ0E|a&wgspd*VAO0DD+yT@Ku@E!I1FvoY=O|5eJ#ZuZUiY+D=g0%w=;vTpZC9L&4 zgZ0|)jo9evmDgzwpzOH7bfn#*jt=OssLZ!FRTVP%UhXaX1k7rbQ}o^{3QGm2tqa9h-6-Gu}QA3ZdU9^ zd`>0$O()e2;9eLXMQ+-~*3%|c<-?u&6;Js*<6DJmbQePiWrSCMLfEp={Ys>&++F?q zFEN!pxSPO@vnt&mK}_BQ>ag697GB|*P$myCvu{iAX=-Hq2%@N zuAJbFI`brHBV%85=<)Jb%!Jqj)0ZBI@qa?=ma8$o!@;E#30PLh%iAy`bhnO{-VJN?=nBRF(< z8M?V#9$UUqq828)VN>`b+llyeOPTLHnUfQIRP)Bzahasud%lli^;7Wg7yZqISqhcN zL)H-XM+CE69&$lr!Eg<>*Dc_nU&5vNv47Yca zn)XxwFJwxhGY~2t{2vTBdU`Wl*hmf6Zzy%)sFsSRxVuK($Qvh$ zqg*Z=3?G6KlKX5gJqu*Qib}ck0eFN-JYU4r*`nasN&cwLDSPblB+j29&fi=I%-27` zAjBi1;RmL;4v7GnVXSjAH#SU24lyKG&iK2(4oTGav!v@4u(JW*l78=BZDFsWBj28e zO@0tirtV`Grw&g#G&W2O+DL}$^t&R}Ye18L(}_IEKh63(Q+_oNETkI}9-lnv@Di}L zEY5yk%Tvj;v0qEceZBNh(?RjqVbM+*mf*M32l3sv-XE-Z1!$2 zK4+P?9`^uI)IS5=caNeKZ`ZTDqaN+GrVj|vy{zx+Y+0jfn<`+-g9s#0Oo#=)puZJ4 zz|g?uj+nYBdt)Io6!aFtkPgZ@)B~|Xfr1bktnnf3%*oXQ+RjZV%jHQF`*{3{04)Zb zM`ME=h0&;jK#W``Y0At=7&uQ^C>XE}%*_ObOHc^BOWaDJ%c|n|`{CMARhxo(t=kbP z*V{&5bad8fD|W8G^KKcw(@@gv?{6BXF+XUnEouj}=E?3dXFImrJ&l^$uz2s$ibfmv z0aGyC^*HhAG{o@J&$zkH3~54}tdb78GUIn}>3?)Al3W?#4&M*a5qZDz{!s%P@2;tyAh{om^M(w~`rQl#pulil)o zANW`b2F?;9IKN(65=~|P8PXREYp53r)X28HTaY*k?=@kt%>uVvNB$re)lkzJh1x&; zD_{9v7>o?qu!abiE=q8BmwD6B0~6VjyezT+AEc8_*n&@i+6?DunXIE4y>Hg={^cW_ zs?h-B$_}QpXH(c+6dKOi;c30GB%NF{w(p%{-)!*0u8usoYS#~kHDgtcmxPqGz?Stv zgkKB``R)cl@R>jQ7xAdzTE=04T^=8|R1(i|+yY+qWz zSYA7Pq4>YwKdbc31&>&x&+KUY#BYp;=;2#-2vV0}qNf3Ibp}lL2x__hG`wA3E6yN4 zF?UZ#5#lQ?VUd5PK5hkID{!$%wmasuh&C922ryNTuY#|@8c9#z8YmbF80OUiXgbo6 z$>HCfNFp$p?|(8$3KZ@_-Ee3wxt%`$yew;7;eh)dY$xDc4N;D1h&*@Ud@wKDVbK3v z77Fx3bB2qeahhK|xW8lHT}KZ?|7@iCeX;(C=Ge!BTI6Q?o6|65bI(ch3g!!kp|&#z z8w{F}Aa@-Gzy0FuY&*DqTo9S6>kbQsfJZSqPS9R~ESEwIL*~!$X?sLUHl=o#U^!{r zI|4NzwN`@Gr*e3MK!UUBdJB#D84Lx?0=`)d7=^(cA%Llh+-3F1#hL$>(Z+@aCK)AF zF;46=F9p#LGMm2@=;Ni{Z8m>ch2oBgg&u9JsqKsiiq%?Ob7kfnn$dkd&O$Q|9A>&E zdRIKx5Vy>JL-)$X7^JNxqwt7{A=UT|vY9M3MN30sO}qKStFsQ5zj?ebU?TK&mkO#r z3wHUy_dB&JEfafmC>tK1Z0x}d^PQRYT#dA1G-bINhGL7J!cQpFTnzs2zITFjPJiA>H{(nN_x1l#oc}a^|2M#H4sB-;Y$7kceXW7{Vn}RZVF|S! z{kYADmm#qclx^3q;&Q}GJ$b^z%R8IB6q;xQhD`0?<8!qJQnw~35H{kOzO&JfgY-c~ z*uc9@?cVz+>#g_}=jd>U_&w`@@hBj41qcY0BT#Hg1*1hPl(=yi^$8^k_POfH_qL zWNwi{oE72E_M>DW*;4b&vg3@kWlx+y7iIVd*)b=4z}Z-NX-?mRWFMfl^n52OYQ%CT zcb>!CWD+j!!Z{%a)&NT;{Fgiu(NT=lrweHaSvx@(Q_agFrg1+ zb3be7Hy%_A+Nxy4Yu#0Ki$g33(d3JxBAh23a=^7V6QeQNC~7_FS{$F^HwH~kZ-yFi z&BGFy>u8OJbO_p}0y$`+nmL5jc(rlD*5R<2w9CxUX&hP|q@h?#@FBEz_LC8Ma< z&!{jVgkLXSEd!31L{Wu3#SrEU0{KK9268RL%;GAd;^yAZI<0;;XvRvd(U;Ft5q$W9 zW&*i_og>v}ldJ4?N0YdO+aS|OL;+tNUm%~t&SgLSnhT(ACjV1d>!%SGt@ObbGs`$r zkiJhKOd8%M!PwOxS6g@WqG3ymHVqBknnEkS+x4eBZg}Pda^}t9MZ{^u_&H7tuPeqJ zImLsHrQ2H!pR421XUjxj%oSB0c}^g+!W^^CQsF*JL)Nf6nnCcG&~ldj-?URebcd4p zUFy>aY~+z-jrh#>MxI$}&lPv%Y>&f(6r+R9XpV66-%Pq}0xEo#pteXO4%`SN1S2YB z5pBK>`lybUlETRWI#sy=D6w?dF5$tUK>Sbe_BCg+VGlaoTT=r#CA*#3jOG7lmLeB} zl$VgKEJL0K45AA?(3oU4XKDuU2<$=Tt#$T;_s?=@6=gl2 zrK_ zX;5gx?{o}qboRw!)zier2I=;61v6gEhjZEG%5$T9_>S+6$_9;tp@aBurHX4$1Q~yh z8(yriTT`~j5b2n{5zojpg$h2xLnZ4FE4X(ZOM62) zJjICN%))UF|2<$^Qd+u^%`*|w2sJ{Y7tSjg&v)`46C4{2P6GM(9MR)-aZ%8WTWYH* za~@MSNZ*J?v}o=`7OTZW~~dW$*~=!$+0_~;)C6j$=fIW zSbev|avn=#{loMEKFI;Sv3DOh^3{hexgx&RcdO8;`WjSr#?VvP<_;)exsW6o%7y(Z zju_m8X*-@~`v;D~s?2)h_-Xv#Wp4j@;!3x>#;=#oKiy8VXFOi;w7Y>gg{6=``3t2h&v8|2m z^t6>r0v{BxTIwdV-=~6&Ac{!BdLD!H>}Y%@oxGVOFLLFnEpiuaUuO#vGa_)m27nIq_;WxOGBCvHZOdSU0 zc|-AKUx7b$Mt7M_H@ZJOLw(9**qo-mpe{QE>Zm=8(0B5Zy7%vaPtp0qUa#s z^l2sKeevfgT7%_Cj7ftaUn+ReTzPmVkH-2&?PQemH#TVZF$&B~D8k|K7duO06P$)7 z^pWdxnDfvTsJ3{lU+M6QD0~Nb6|{bQA1F6J%~FUW$-oc>gN)d21u>UMfy zMoKjMpCOSL*6AaROHAm=b(_gGgba}i`@4;cKI}`?J}s0Q|Mp(mSpMk2>q?uz6PKq6 zrb$DQyu4%$qzLZm3CUEE&X)*@Zws`rDA^b~>cX1fJsKSi_U97fSB9&+Vh69cg@>Gt z+7TOG{Gv>GLq?=9)P^79*)unYN3FSk+@%jobM>DT5qXT`bkp&e;u8!JoC&vHofuW_ z-R@;^c`|8fS!uuPq4ZpZ%DP3PLAJRK4+Og8L&L+pmk5LuTob$cbTJH%LTlu2VvzG> zw65j9%L05@Enm2%SdxB4jmz^15Hf^9x>(^jB?ZQUg57%6zGiGv4|wZw>g=yT^D6`@H<{pz(K__cc81e542Z?`Wz`&00nLOumvn<`A zt6IzF!e8Kjkp??nJ(FmiZu2zt42_lc4E*9d|M_y)>pF3=scP^3u6>dZL7#Rbb~?d} z12t@WJC_y48#RoR z*AS2A9DPyw`mn#Q4Bt(8XKE=?2iYzE(%_dfQ{$unU{v#n2mmpQ6o_nizL|F}7EPpP zlw{iGpUHy9w-}|J>k>Q?w`N&45mfPX+M8w8qKv;x;J0AW7QOr=Zp52mlKOSIN-=Ew zO#^}@h7aJIrKf|gZ+UVM`K|)4OvggWxH*t|HbBP$tm#42fEo==x z(vCaEv0nam$w1swC=lm+sD9q+$+F%;qc5Lt%KYS3Z`2(060?l6Mvg%!nAy z@oA&07_m{U{8-Dm73AVfswc(B|Kr8clJG8@EHCK_W>EI!8%hDmU-4TMJLN~X32Qqm zFj9lhtaM909|N+~2pPZoUv?eeUDBe--W#|*;U=SPmoJG{xm9c)^mNRBi&O@g8|DM> zf-5BT$S%683-buT;(WS`=sCta$4R5C1YbJq<_BFD7;CI#W6U4nrsIEBdRI zllA@aRWn`>4?AA{mDm1>$SLoA>yf_g_Z8lZZ>fG3NfeqHROKw=;IGjUXvr?=o2wD0 zB`P%tKlYCg^P%S@LPEs(a;BXg?g)cFIwhzHpK1H9otTJ><*$_Qin*w`@-!VjK;(J7n{Zavtx9oTv zu8DJ|2$>5ukt5B1VN;F;2C%fupp+?OMwIP!XkBZ3q5h-c_a`bUt-Z1yCf0COQK#jo zos=y2cUKUJX#fNP%SXX1!eWFvV<+gC7(>n$FS@_h*$#fa4y!GMkpxmpFI*O zqdENo!#K%)t@rZFLJ`^b!PnRoL5=zf#Z`{7U_?~eN;=r>TMWcq8OM6|&p?FAZ=~rhUT*%OMVf=7kGFsV8Ssq1>I}nkVZl?Zm5zmM8dERySYwhl3Rv_Zxgs1mhs>%`$)lh|j#dGN|BAbayPYl| zT6WzNrf@>4={Hdb)yo#0?QUAc$Cf=9Sg^bdg>D@cUz_axiE{6+q$ZpEd8!Dc|t2YEk`+iw;+G{go_~7dM%y{m4s-lPp>^3*Y{^AT&4!z^J9-a3hqVIGd zbAI=>M_InLmwN%4Z`k%GP3^>DD)vuT#vGjaMdCN_7l@qfda zt0%Hq6NFK)HcNcHD~?O~c?9vp);0MYYsUz!cfgZw*84htk~%7q4B{_qL|+*QBIwbO zwzMcXKK10v(Y^3@pk|>_252Q3et77|{Dm;^<3n7;F5Jf|~enoc|HX}EOZh>Ql~#EZ}d3E}5E*>!)3Jr8bFl&Nz;AX%x(61ffS3Z9&Ex z_?HH%)_LjLS@|^#h8s$0Tsb$9MjTsNQ7Zv)QXJa&!Bq_PX3M*DvFJ;!RnAPpIXMwG z!P*#DgjF?UahHrBgT9VN}1%Wk7NxcDv z!*P^FukD29;{FoKoSkmojH_ov*JG<~7MiL+!K!_6CU%;y_q4}N6L=`&?y(eonN zKRu0gK>Fi%%oI`r2L__jeOr7bAtM4A$$W^2`5m^eKy)^=nEgCQQhoQoWi0~xpx-#` zw*!2RnPqjT0WcX^AgtuKsy=UL9 z3Wc3X$CejI1A`y2jePFA%8SBjCQr0h2D|Y4u-N?WXm!S%2k!VDmvsjqlSQsLFnQpK zetqiyUl;tCw1mE7C}J77gd+!{S!etA?R6u702c#(SJ}3BSH*fs8hMsj8ZZvkqzbqFy~FC# zHk?KUXuB?3NFtybdT5bch#gj@oz^=BJ0pV*9s$mJ1aKgh%N>lVoX<(@<*QGj%^N(K67gNj# zgJucmJ?Z%DPxa`{DpYAj*bLO~7N1fH{p6S=RUn$(W>bV=>WXwG|u$tO!-s@PKFjrl(Vq0Nde=d>Wh6n!L^O8 zHh4dwhrw<+vJlBYY!;(-Bs>;Dyvo_UOe%iQ_|3zm=bG5!pw*@e&JSx6xisFg{BgXN za-~CevE0kMNTKRkd&-u!hO(A^ce{wMmWqz;uBGmTeDLfo%{A`8sITJ7NGNn&lL1tHjVDC~i9)`AvK zeK9G_>5^t#+W{&L^b^G!9Ckei~ExsyyCNbh`NXltu* zt}B)yHbxp)!M=Dt;qq}SAEdN^o&KJ$4@u1x%$&q%QZ+GI@II%{1mkSgPRHBDXP4*O z**c-ie3N&g*FSO{CwFsA?Mg zZi!1es8_Ar$f`v6^0OmIgDY}7KzLRmLnRipHU27jlW zXI9bhUbu_c2T z86r`v^Wl0aTe&kP;lMbBy&T~xt9i@32g<{TQdf126a1M2>{?DT1otW2Q5kQ8GLA6x|W^h z^cPMJf3WdQAKBqdvHjUbf)!bMe;z{GeBmykq(PGqCq(|p#SrHYpKE4gpnLFXD;V@F_(IDbaa1n=8}xE(G87zivQz7@s_MT@_-noYnr&yq zqO5r@_4Do_@c^3m`z!ix*(uwd^nZGTF%1}tQfO0_S(=x7?GWGq?Fg!rnvw*{zCKLp)+x_S(7ut|E~N`8 zz%XUUo+MM~I6La$W}!YpyUe70NIcGnk0+1StP$|;0+h2DdpO7(r5^(_`5y z8~4a$Pe`Uc9b6f#?9%5vpM_YaWBc%C2n*}7IdZ2vj25;H-j-f79mf} zU8uFSl>t__>_!GYj2?yG3aAH+&zWa#M0 zfn%d`#i6tPPrr%~Zm#}s2q*%;`Wys@`$47R1RM(zdP7|RhH*T}3ee55OWHk`AR4%Y z_3%N1DPvUw&Sn@Ms-8g|&s-AzH8*rJPU9>o6`=$``^bmyl94CAp^{jsit4hSP%9iCSgVb(&$TxeR9z*6B>iWH_QnkHqy{7^u90ZU9iY^f^y<${9O;KO5(+hF&cY@9T(Bq4*!ky!>E zD^p_8a~MY1@vjwGNK-LMNM$jbZFwxw@%wVIkVhTVrmP@nYHzNEat_8`G3_Pp6CRd% zlP~ab+k*PruxgAWqQP;=S(V%9Hgi}^G>SGu3~R0PQ%qECQV7eAlzX<01RJY0)Ir!- ziF&O26`|>NE>cR1vyTq}+zb5PTD_X&M$-4Wtsm%xpZY+XH5o3V6IyGjFrVQTDYk3? z(lcLv-0H&}!(o$XW+4oRXmfDoazHEiE6SVGSeV`q!@5kaYwb1M*TGo>0vVK!-MRF}Y^@lP3$?D@8@MhkQd0n|HYYR0ns7{m|@CHycbnW9Q@ z@Wror^VNRGcuEfs5Ciq9bi#f0+oC+`=1{(HMF7&e2b~jsPQxWx8$GtdQOF(|q6{p) zUn7hc$!7Vyl;5N%SoAdxqKp<}H3j>D63m+Gv-GiI@sHKb3@yIz#ZQB}CrdxL4EveY zBk%$K3bO``N+-C$^zBW?W@8&|R-`R8Xk}?dBdK*6JSpikG#?2{`zj(6K{5BRH{(uM zt$*$NxErU$7E-(Oz8&inHD(s^OP4LY!{ntK5xjv7h(>Jb3g3JMtQCmb?2H14JJbSy zTY&iTHiaTHgfHK&#Ib$dDdxC*u>!q3y^@}YS_h@#d&q+?mosP$y#_YCdUx12Uo-pK zi8JN-x3Aa_J2uqxwpn)`+5UJufLKmjUYZl~2W~w7^}jx2FWa#En~$M+4=xL1;!1cv zZ6B+<{{|Of+zRYo{l9DeN2ov?O6rJnX>In)l6UKF9P?w;0F4ZK|Ag_-PU_;liW80g z+O53f4`>Kw+v>~b@2-lkpTKH(J^QmzZW!62&oj-Ph;u0 z1%tJ(uroF39-U6G)Z-MJ0uB^&23$&8Qie^N+IxTXDz6^y?Ul7#x*>9LNL;3-ij4|d z)--S^rVy=s*_1%AK+Rz_fr8(83K+r_!exNPhHut6DFZwZuCBJBJ(eXJk?0lx+R*5t zq1>n<5SAoLVkk5b&vghrbEJfSDI6bOHZR9Yrdw~mu(a+4DMaN3$#P~yyOJ`DTqJGH z?d(7oeiAOJwMX|jI}Rxg#ma~SOp0rlQ&K9tGA9u3~4KjkD812&2=L&#MLr7=(0nV_SNOdR4+o!^NsC!p4MkJ7NjpV(TezZP z>voz##f386D~Su5+pn9Dq6<^@71GV!6_Knt(Bx96)z0N=xm$1NawV>*HRnZObS5&8 z$V#UNCj2Igm$LG668#j|bhmcy^0yBipryq53ZMnQd&{eZu}Ab z%Yog|g#U-i)fEA@2&6|0l|490(8t*OhkaUPYfoIp;HC)w&U@hG%I#G2yXk%K>l=CK z&il`mtBT8j#G4%s!J8v^vh_<65fmCl4Ejbw|e&Ccb+aarpClZ`0B+xQH_E}#M8FO!W77898z%1EgyES9xlt?bdCLa zGs90ReUXo<7WGz_m5c+#Qf*JY96?6X&c5fwC>ojHqdjO!)dxJv@^>NsR}PuSl*?iP zP^^V|Ieybrme@qn?{)>Gbf~X&eapMr!XPl1$GPoC06?mNQq%>Ve!QE(m&IxRY#KAD0o;YL&~rmNlaZk>=7lCw zawXUPU;&gaNisLHZ4CVrqIx*0cqWBr)BIUuPl%|T8J*sk5HI=1WI}wusS9t$wUfg?fIfJ z$%|yIXo4g)9!+H&Q`li37^l`q$QAYjjGxy#IX#X1)YBo^Wu(~ejn!qqq?55ky)HZo zdGJ2W3#G%}1zmij*GUKUT;3tho^=9^SbKmc2-u$Bb9)=k@nP675tgW92>aFsW%+pb70% z6xDXZr7^Ljo%ka77tMX`J2>(AQ0A3R-lexh{;G;PDNP9~)5Hm;Rv5AW*uFzy<%zih zYoMSP8`LagVmOuZ|qOxF4< z>*M1?qUQtk8v_Q5SD#|y_WJKD%Ig;H3cy>}d+EA=Tg>Opu#qzYz9uZC_iVfg|&MG-JSv#zl-!IFUG@9cn-J zUd+79@i?jEKZA}*hqA&r5n>_4wlQR|^0mC=#!2Ffb4Ne!IE;N9+I0awmFpnpjM>cZ zE4aKO(o5vb($6YyFoiC{q*5%w8jEwxp-WZAA&^KUZuGp!Lsj_gAVRGK@u`oXAx%3o zp8L*VR*5#hJ1*JS^}>|HyR@Z6)*nxI<=IosbH&|(Yi8XgP=#Bl5k9^&;R3Dcal}hA z0w;RaD(|l5C>%bDe!~t?Xep_YiB-m7mu3{`z`n)5e?%($=rm-$d-k#=4QH5#e{6?z zhY8I?m#5Q6DpJ&6YYL@e3e|S@RX<4|S0}TzDO0SSGm;FO0{rwS=`dl`1thY!n$_|w z{@|Tb6Tk!(_QGBK{Y7rN{KpofT!t8Yo5q<^hgfYVty(Qflc-7kEwqMyj)`p3M)l`= zZ%eB*GCxD6%QZ->=J(Sqx5)ClLmeJg(v~sX8j=~>iM#JO{drY^Nd__NTtK2RdHVGl zKL^CoClActvoPe6oarac`M>fPKm9^rFqmMKq%b}1{)CN3wt8u7s7K(y7+gmdV-lEW zTFb4=rn4}%)g=|BE?Snl%*GFWUy%Nc9E3M4lFpOgctleFQd*-_{f_CkRsO}CE)X{r zbn}JrWl*nLj3zUs=@j7zG|U-0+iFxdc(v=b+8hY4e?s{be($#`hnk4`!5sbMoFVzw z7DUpA&=E9v<|ETkwE%_>vB^7AwYlWQIGV!;-~M_|@qfQ04?4orEbly`P}c~DnVSs? z7!$(;$VV(fEr)(x>^GRK7d&6on850A)Z<&`UVr-JFSO^VYmD63{~U*T86R8t;YeH* zzOT;85xalS8YCAlN)x3{+1k;KrkfGPpJe|ukY+gA_Bw(}-v9dMGOPBk-bOxA#Jm&C z%NQ(j{cF$FXAc+kNZAAX0}U1tL4(eS&k^|aoBy$Xl74we75J{5$}!5V@RybX_X}dq)5ZKesPNwD!1YKG(r>QB#w!ynuPHp zE20zz1CFc~Uxi{*pE%hn8f)j{;T_7;Qj2`v=dNKuu4ZUx=Nu6d77~`P$2>%?q_#RN zxNZ`+a2ppvtFq2#=s(QNb@Ncq%M>rE2ZzK`G-;TE5rk9CDlNTGDgs}W|B_B{Ofr?? z1Rca_KJEcovPygW(d&Xw*qh1&iBplP?}yyg>%t*@X(Cc>coe_rZRCb(^QAUV)?@!B zF`*iKW#eRp0hvqTsTmH_PQ-aC*A^ET8z~6swlv1$xKv|PnvxuwNODZ0v@a^f1Ypsw!2tFqgrrRpyhc_bH+3p)6o0#0*^qMlvseu@VmB5 z$hsgiO$zygK>`AT5(*bc1q@P;(T-PWMiOjJNN(%dumz&o5YJ#=6d|umpkOG6pNf}C zoCg8}1flvnI-Q%gh(J@8oQGlaG1>XObm%5vUCdQ$lYv&TTAl*M=j6ty0{ULRzky6u z$d^}H78Y8xBZuW&;1XQ`(jcJn)5PE6)+eI^Hf$haI@+NaKG(RI$rm|2+N`M#gj6x- zKM)>7bZVJ|GV`gFvDzAGW3&QD6xk(~Eps*zquWU%m>RStn}3_>dx#8^f4GZf}+1Cb9sGmH04UKk}r+f*9lT z8!o*XoshCZED_)Z{xN==Kwn5hj1$pR#C}dl@7mr`<8!&YQ(0Wz+ka69r%Z$WsH_jC z^N}0<-%j@i23~1@pI?UCvZhlIkpb=1tH(16fY~_(CUrK_Zp;we{E0HF{V9F!6`Q`KC2!EyBv}o?0Ke~~shs_zu`qkIu=&{sr;piaYe?7@Z2%xAl0kt(R^p2_k z+OV*WB_u;^n&6NgI+q&gc2&;(N1C>0`olj22Bnl+O|jZ!YgB7b++P7J_DM@bnF0h}Pc=Yh%b!Sg4=0?z{$=(Cgd%{QZxG zItHX=a}VGaiMu^|~(h$g|4{pCc%bAr7pbOofg}LSSu4e`t$gb^9++ulk&Wp#7eoP2HrRzv5OfNN5t^ zN&tM@%{R!aNgy4s$CZc=G|a#0b9tu=xyVQ?X+%1#wYnuDkZ+Cd`@I^EBU`CtA&)~P zX{QLGV(suYz&-{(ClD(5o1lL=nvT?@Epuyjw6ircY||vV;Hw8{Tbf%QvR+@hMvjw_q}N=?>kmSv zhlB!)y2_y!-GAJf^|RkyN}RX*ew5z=Bx_MM(Tpjk6NgQ%p@L2r)liZZ=!dueaCYUw za9>|OM*@=`HkdT^gqO=M@6FTN$gGL<@prUL!5nA=*40t$oJ)k42QBhvf;(}jQyCp~ z)>AR`K zHm4KlWNFFO{{DE8&?}Q3h;+0_VtTCF&eHYLNKq6>I*%!+PkTzlJc{YfhpBO+sA7rG za|L+9<0O15u_tTXG>xwW z(CT?FQcEi!9D0M$=GMg#v>_OmLa0F|a4L8=7%`)$>h5pGEB zyK9Gr9rlIii2(_{w4UbA2d=0MM*2+9b~Vjg*(g^cb3F=x7wB-Kt8dO0XQ2Yx!%JMw z7$M8&cMR(Zy2kGO)j&S7Fe?I^-{BBi)BySWlr-$U-;98Wg&C_F$9AJGPBZfxpI&28 zPu_}hQA;Iyu|^Iu4koC6Eh8{5FIF0FE=O(DpY8JzgJFN^Fk5xSh5JMoAYG%5VyJx| z$1LvPP>LaYs>%Ps$)Yss7U~u)GtMgSS9Jj)lbnkrSGIBRcU}(Pb?x83FUmH>$N69s zHo}Z@s4O$xqz{tJ-SCIpf%S}W7^5O$ksK6h@VFAczeb%DNs4_;*e~L`&8eG&j7AAn ztMyYOW+0#UdS|-pnOJCH_uj`b@8;k#SEFl82pF+oK26!9ZucbMBDOL5&M4QYzjJl4 zgX>GV`Jt>}s{)HOUq|nJo!8)@eGQ?}hK78;&3AQujxL zN3Qr&oeT(Z1$QhapDJ}447vo#+Ot0@1*8w*!Po2ShhUH$Ia5T+82}e44NxsVak#7B z>w)O7e#HXTw%ZdIzq^D71?KGEs6(tbl%kTRaX5FMF|~k+*-cIUp7S=AU)lsm^ZCoe zl&TbU1m6mzJdUyN?8cI#|KCV1#{ep%qQ-Q+=nX? zt#kH&thmpVW=Klql`A>|h63Lc3U&&VBy#|k!0$tjGWKq00X6NeS6U(Gkaz6?5usZF zWI&P*q$Kfcnwa#RZ*I9|T{g3olaNvjT@r>JJp$u7Wg|SzBSQ!3@G2UNn7>H?INE61 z!kF^+rh|Sz(rBgiz8R#NlE;*@LDkbKPM)5eM7UMdK@DGE+O9r=~ zq615`WpyythZsL}iXD)h`PF%iB5)BJEs=2;^I7qswWjIovaG;a2zQNdQ8rpu3b{p% z{?Wo6xIbcHKFb%|)p>e&RNsQSqmh^jQYd0F-gF7;_tLyr3_VFcoV=VvX_>R6W_K~Z zf>5L@G>aysZ}h&q2AU`v+w-DkB?)Dyb1c;_!)Oa1xQE3QcL&HD2KGasMnWx8KS>!v z^DHi5LxSY7&6cY|8h=})ER*cZ-i`21svMbKtN$V-v;fpul+7N`Eg1FWIt%dDTL5|!Vtx_iCv5*OL2es{4{w6pH} zlGu6@ZkPbIotQq0ZHRa&6Mz#HsA!U0Za-i_avvz0u(_o!?=YxL(Y6qnuXmS$^nr3% zziaphN>2*--{10&Z!a7D+&}tx(0p&??L*rHuj@;FNA6{n=w-rUbASIfW+U;`+a>DS zO)7%_2ktHNb;W+R2NVY*qz=NkKkDdzMtxlC8wjNr<{L_?&abJ(%;fS=-r1Z;HM=6%u zs3Q?+!%k2{(2lW%^Hw!Pa+rh+?NL3o!3&S&L#4P8`M3H%3J&;W2+xQ&gkzq*$%k^^yW_8OmEw zi*D!_W1w|Jy@Ab)X8zhMM!BEv8Is{Qk+l-nS3u`+9V|V5@{Mje%QDBguAdG4?#}nw zp?VhcdO=$^`?aBbQ(Vvh`Nod4uI9(c~iu;7Pih< zYW3q`w(vQJ$A7c{1G2uJY@KI6^Q@YX5bDS6z^W+;|Iju{nd!_U6SP^wemLWoIHlj% zh-uCH6NCz)5rBN&R#E@i9Y!jo`q>tk@qZLe;QQXnfLbcM59`i=VYL0$VbGX8=K$I; z?YGSv!VjVb^7-q{j@LmO*e|zkxMj5)b!ZlX^>*|2T9A_C0~~%2ejCgFNCq;PkA3tdgR-j~WVb;@>wv=q-shuGDkGHoLxFywr zmSXjhJ9?==Bddqx(JB%bL3Jb&)An{N>P1_!Jdz_kQljb%>KJC%7MYCCk!MYipZL$2$|xM?G%Lhp=9L}2v7Ui9g+yuxxmNiu``VtgD z`(L@z027|!6C$<+BDltH-76zq_(l*T8n97Qv{k4z{b%-v%Rt~b#J7X|r2|3OD(4x}*{r)AZV&6N)*|7Ltc_%#YK zjpIa`*X8yFBIILZPJi8!MLn+`*i~<+Joa<_4W*$5WufOzmRwenB=b}24Y%Sbv=iuk zfgPKnthwwKmh&2$*sU5XGrc)_pV~RIhR^S#fF#(z4~iQLg;|g60zgG7WU%0of3cKS z;?#yLyq(7%`j8R2eeIsZLT&szT=h2`A--XTO%p9K%LE}*dNlHLasGPt+xPRHs~g?a zbq1yP&WBi-$(mfH2`bcEm@S`?fgB5YRo872@4rf_zG4NpE%+n1CV#>fgJ{0>KioNM z3ma#D*!evK}0!y!y*yyn%;P8pe?Xfx)(_m z`|#}J+3O9D!*lvZh4SZNBtwB!l{f;mtnyyZo)<}jE-`0w9jU6}6Y}YbsOppU$*nKa zO-+d)d?hCx2|#`lyI+@-M@1jB#(&A^QLDg^=|ofVN@hlhPG8guXFXjg1v#C8CVAs{ zECh8&sHIBd>0o0+@ci;z%?3oSpmZ3pqeU^iTLPk16hk8y*UYTii};#k@U0=q4ghH* z@TDD^gqS`>E#i>>}*B>vE!T<*Q)%N?ko|A-UFfVj$ky+5*%;IsDODUBEs!RBaR z(dV5&h=`aNNM|$%v89EB=ak=ASoi=LrkATgxAz3FD$7LEt0D^3_BJ8dy2U8m*2>YI z$>ORKKS!fvk%l!JO-*s2lS^ByH9ZaO81vcXf0IzBU64{RMBr4j^DGV_(l>@KgUI|0 z#Y{6Z76xALgXk_X5M=mDeEszH9Ene&m8FJF zm)kqvFBMJl0TLB~P=sZWFY1=#(!Q+`VdW_JqqAz(VP|Xa&FS$t45}$Rq~ze;0mR;4 z0qO_x^^X|qcBx~rh3m?)l81z$x1aRq5euc5=YH{dxjC8%&u!bUG;B4UbY^A*$J{QY zV6m&6DbGtwEZfPWX7Gl_Pxf&CeTL6qWvDq$`s6rFj-AgPV@?W#q$ABwH>xB2+eQK>eNSB8u4K|Z2^ zPKcW7Zc#KlYO2Y13Yd~Rb|j|LQ(P?5X3#hZHK*~H;STi4i$?T_<+MY*T1K+lg5_fw z30`hB6jJ(LyuVbBEEi;@j)fjfd*M_&pU21J{ROE8L0Sn2+dIqdAFowLDL4fD~@|4)eMR+|=;BT#~P=m8BEVLOxFP^0EF9o`S?217<| zJ4|NtKTEWeN`9=P+1|_9{FKZ5jEFC-cJsH8BP8EFEcAp3C{W#^EQa%}l#VEL6$`oA zg^VXT1yZ=mmLrdx+43&|gJW4X8IBTwKTJ&s(->XOm;Q7NGKDd)kVW7mI62rx=+GCV zeAlYP$dx`&=x4``;1~TCsOIWdn>3gX5BD6aJX}E4=J#E)(kuQ)QV*?mto!0OCXT7M z(P}iDSfB_+)GToRoODTHY;CC0H5?Ep5+!LKQE`__PaTg&juQ`v4_uM6~v98zH~ z3{23C)4?Ev$+U=W97P?npKaUA3qhi}pxH z+N_i*Cd1k$H~OJA7gLwEyeHuzOre;<_Otv~XL*oX3bHLL1D#{gM7#-1WA8rJ_<_4R zOC|%lK8d}K1qJ~W;e!SlZH;lNbj~k_4~d&4b*v(RT@qcc!rf?m3|)^6+m9Pef*U4Y zBTf4n9k%ZFxVk2b8#Vo&L>RMCg!uNHk~Su`w@YQaD7aiM4^m0)jtp{nD`#Yev$`3x zdBfzGeKIMnX!_s}#XY_l{G1ug414%Vv0A{c>|>AjC-EUxq35gS;CGI|?yfMS-JsTF z;lF9f=+|gqO@=Y!aiSGgkE+V^} zMEJe{qAH=sVqf{e6QXQ^_ERkk4hbr=h05<7B1YPH4kfY`w4XDGqHl;IeJ9&w1l`jb zs^_?H|IO?S5jO;K*9C6p&aJIL2abe8JnaX$L;n7145CA-7RPBWf7AnKp{fJD1!mnR zoO`7}^fwdzy)%$NU0pf*Q5U1o?80EO-kYEClqNe62Lzc4l=Utl?(DV}Y}xcw@e>5; zUsyEl9p&WUF*hS6DP>%264Q=>DkB7GNH5XF7Qi1?#Xa@Oe?@mahYbvFf|NWi9f1`~ zDNttvf>gYRi5hh0kIpvy^&csd}@aqZRo5ejnnL|B|b_4(^_~*0p>UO?xYn5eipi zg;mHa+v6Yl5*mPel|2|Le}wA6XwM^&%2U?@S)ZYhJ$47?zm1NHGBq{D$Ro;6Xc_zN zXcm(Q_(bo?v2souFl%JP#$5MrR!<1CawZ2-t-+tf4}K1-fegvhH{}2c1_xo;d2AuG zlD9@xn<|nL1)tg`j|Wjz;0}j@hp*;#i~q_KF-!ra-iYJvcR@4^CYJcy+gq02LJdU- z(Hld-r?e^0Snh_g8d>S3$EG{(QfVa2!eG$h%l*qlXM!q};Lzv#XF^Q5SON&>1KCna zxd0{|^%}yWWD*|lqt6fxRO$znd#M=J*VJ3NjZhDqSF7Xg<#57BB*TM&)ki&D1}+t&#!i*rPsJPTgX&9W7Q=HgsPrvAGG1nAb8UNgWXm{K&HFe?WYV>QV z`4T505BAa)H^{^#n>~DMh(8WSKI4Xd1a|Z_j)O31s=9bITgzb*9D$r5# z=EA&YNrp;H{tfju_hC;Qp3fd1D_xKs*8Z^Qcj2C#P=Ew%>cYy_E5}nf*WbaxHVAZT zg@|K~#?W0+eooTQbjpLD3>`uPC2N!&zeUE*r3dB^g9d^DTk#63L+4}e4#Ab=(MZ55 z(lnzUzK4)=><6P4%0fc!X>81qe%e$wSLj|H;v zW#{NTGp4B(3ity=X zWxIBR@SmEEK<%U0=72oR^=+|OkmetOUi>nC_;!6Iu<3d<=S<&?2#+JT{zigA(h%s? z*}m~#V3~!s%jQA~F^R2(aK}l=`uGVX+F5s`aAS#Zw8!f#mf&GiEz2CfVC?UDBwCK4 z&vcE_bkn)0{^lVVh!jqt8g&R9p;BnlCpP5^256>8M;&d$Q~Zk~^SI2W_nT`?R=>W? z_4lEk3}c+!3dV5i3-{$V5XC=(6T({$Zt0gFpoC*7wQR6EsM6)l-LDBK|H2N$WAZZ)e(tVAN`-W^Zx z9V=Djf%{=3`{}GWg1qU_Dijn7#MRd-Y?*(k;jH-ausFZCthu^u`8$Oa@AwM8-hB3! z(E5a;l}{ugjT;l@<`$m`+7zk}kwEAFo>eMCh^=FW7G@+SC=MQIfXME>chhq{VYFnm z#fcS&KHknqlSi!4vnrpAg;qhJy(ONoGg9JnbW{tfZDPL1-8Mn7;;1S8Aul#`Yy4eJ zsRg;|)TORpsh?co0A8A>xviL@ji9U6tcbql@t3FZfr=zCf3J8brFmZitnz-7`CKl@ z+CkyXS!vKhTPVV2X~NY_=IPD~bITH|fLYX&Yy->l>gK1#Kv-73on z09CCSTatqQUCiElABGrO35^jq5p6i^HcDjGl%_9`Y;10I6N))m9MbxG9%_D35Ye;0 zA7!#{5Y)!DW(Cy2{%Zwu)<{@|lEgkzRK(BS?m6#aog5O3eJLTiHJ`p4uY z%7I(%O_ZsNK*=hYBUDYlV&3gVzI!v|0u606uc~0J3c6mgKLzM{>^|47XoCy6WihR@ z?uLf*^s9X*NiHA1Cd+p3N2CVACXUaH;S|S1?sXL@GS%HiMi~fT$tzp&t-MWwAx)yt zcC1IA+x!!}US_uRJf(J4O5U~9H`K##rCr9^TJ|J|tVmK+^1}d|1_c0#z@(IvG_1$0 zJOt&bw(7d(iU=ZG=l)XDS?8}kIq^+wUssD=E`Eagz^HU2|mujNDvSW*3bV z)ASvFs}rN$;O-U)zjCvu$UsZj9D!?8;=XBuk1asIJ2> zN25tf9haR{30ibhZSDFd4q1t9&;GHc2tH8%-#Jj1l+}mM-Q5+#%VLsB6(_-1URFW7 zhNhSw@{`&M`B=jeASD2(SMx7KQnbF58Qk5)$WXMZ9(g4}8=@uAl(sVzpzh_gR05Lk zt?D9{N)@C*jn;EpODIj@*f?*XIc~H|1e=iS3z{V*GHO)~Pg-P4oYv(k%7%W;X+lC~ zQnmJ@-oT|(uZO`-+woI7k$vgVOP4|u|Mlm_W>%oQU%oJ(jGD>RL4d)X2tGoq0?B^C zvggz3OiRFG&f3vsB@pEs<$I8440eMM&1vN z*t?(mB6hn(vr;&jI;W9B2P`II)19!GRMs~(N}Ws4t@%`SnTTnGQeseX2&J<=gKvy< zDAIc-2+ibom<_&moJC;ob$>%P9r-XX6CHTtJzMK0rQ_uRhzYVcWJmzbsWQv-Fa^6T z>nDy*OWa*BN*d6N{m=7Fw_TY-V%h`_;h4#!)(IKz+|$sWr0nR@G$S!+R`_@pCFJDL z&$j!BhmERn3_}fMB!{Gd%BP_R7h#L9mb?{g{;O6U3)m;Xr zV9@u&oX)+=8u6S=ELCyD-^a(aEwl)czF&ef$ZEQiJpe1`?x|gH4sm7Lz8763xJ1pk zEY}1sCq!M2@NmnD$%bQTk{Nz=KQ}2Os(;!=jY)`qW-2cXZkfP-A0B3 zoZv9Q(2y~!6uRO_&4sIOd79?w&;-S~k5)ic@P$P$Xc)=o3f+4|bl{ygl{7s+zeJlP z%uP_R2aCBvi49%&!!73zEb?AU?GD5OJ}Yg!K(5w9C{HSt6O6haTW+Pn!mL~yZ2j^u zM|Lawho(#v4Hb=UK+0`n1gk>*ZFDxds2XOW@uJ0Qn-&mp!@P(duSF(a~~n zMG*fPsQT}*st5}1{qVU^HMBYd$zflmAZ=k%+fIbc_Mt=`#Xfyhq;!HwJmvMbBHe-`v0f3w~T75>)J*GB}H0j@#5}Yytov1cPZ}f?xi>sm*DOY+-adu zXmHn32<~qA()+oe?|siWzs?wE{z%r^Bgx)tWyv+KHHAn#U5b+tAA#0uW=-ZB6*?s&`zOl3Zcwni~yEyiPsmK8;VYcg9hvA-DfZDssK?$s`Q&()$7)&voF6VLC- z|9Ga|8#)fL2M~Nr9P}6Qpm_cIwU_}zzYVnO^Dny$VZ~PNnHwxVH(F?;n?J!zAL(F%H+HC9IpPb13FKBtcA)u);)t&qgFv$kB_#XU!pws`S8P9 zzTi`&|2l~NM{5|SK&qf27N_Z+93`=FXty2mYjUy>=;y{*$Xv*$B@6%3=61_tSj|rbr`f>D+Fj6aDJe7uEaqT@bc`tzf$h6&F##G zyOX~}R`uYDMjVuokkGI+3=kU2;>N~fGen6=XAbx85lvJ1(h@y8G>Uc@!&su&A+|7F z@+lfeG{kZP=Z>@Si<#5Angfp8Y8=3npj}xRgR_Cy1Dykc+W)N_@x47cT@!;*WgajtHFAL>0747d~ z5?(}2vdB|TUur_ww%o_0f{vPy4s}H|yGDu9%4S4@BpT+vHQrQ2+wtO3N%o*SS?9CE zlGAU4ZwAR=x6%uZ7EG8TDSD;-AMp*5=&FTiGEaR(e@iC`53EGyE~_=*s(Pd&C6Cl7 z=qEocHTrNaj^ieSKmMTKyC9xY&m>LX;g4H8TjYjk0Q%sSpr*fgZkrV9G8Hcu0g47c zQj5n`1MMD!r!G8jWWdp&Zq{%2PC{v%mFjiysr%0OfA7p@ID${9(r~>0m#IMgSu12y zn4NEm%SX$c*5FVTFfKD;%3FY#EhR=d)$gVMA}aTY=-$!92$PL=>EPHyu5vbSOlUWp z?RIi~-O3@}*6=&WF|!!gr`FS4B%I5Y%R{M!dQ)t7ubTl;7_BD>FM>>n>)xIZji z?*TkUfJD1Nyjl;4PYgtJ*BGoB~RiOs23jb_RrZ2y;S;s^TO0^TxSZ>T<$56qOLJY z62%Un2{7Hpm?0+_!^X{W7M&`@5`!B$(&NdipuF1eA~zY8#}=(*9tkg2|0KsvU9(NR zTw*Ga`0!dXZ%DHT4Cg^E>@#{laoaDqklgzso9utB`0Lu6cqdDK?lar_-qTJsfT~^C zZ)`?HTslHb=BCATP%7(WMeFe9ou)=){qNqb%J-GgPU%QrYTkd5^^e{scCJ+?`sFdP z&uB0&_dVlto&NEZP&gl#BbqM)U;U$V4g>?hXK`mdh)?oNDrJ77^Z1`VW z0BYQ&G#bAFaf}wnMT}L$ZuSZvjK#K!N0A1#h>7MYA6H7*;VDK;q2=UrGt6Dak`}R7 zKjM?-zL=AyC%cl~T^Mhy`5i~dRlM#N3aGbIraN5fCeq8wHeCyiaK7Iw8z8XCd8uyI zjFm}I)c7lW4 zU$+!RyA*Uw(B|Xi-Q5I3F-Z?cu$J#lbUikCCTotAm}6C%n{jQI^a-<$EqDS=n49De z=TO*uGNT#}a4=aidV)ksej;9ZaMn5^FSIjjYFyyX{%otr?CQt~cbqs2p$q&gYO4x)b|!i+6HTS+jx>Fq#?Zvz`;#z3jga!fb4+(zXX6mR4~;^fi>6yHskPnwSHaUKD=Ru? zJ-${ckPKzfSVob)$By7EOI$~^US%<}tR0R|q~!1~Lqq3_)*j>pp=%`j<)u=4XmSb^ zlC6yQnTV%#Ff5@u3ih=1#O`$La5^6pK%3*W_S5vZiyxq>J5F#O5OXOc@2n|a`fB`r zAA}`+28{gS+TA*vM{8Hcq$E}WH!-5>TJ9=VUH85XC88xn9ZJ4fCXw`r3Z98W$y{{%`c^C%*WtYdP&&%|O6L>eLUP10u{wgcR>sDa3}CbE=e9W0 zN)t`#b%kgE+;n4#0{QZT8P2DvSOQuB#Zx04EO82X^YRr9xYx}{&N70Xl5mJuCvUA~ zWOHv^>O1VU_v`@{eh9Nli07sVq-`e@x0?=L)OMmGhij`ZN|J-Im33Zi%ONQ{c}L=K zG+9APU%BMc<1KIOS@q@vv_t(WJH zCP39xrage|@}Yg2h$hQUH#UJ|>Fh9n1)bxFpM}IGm^D7Pf|G0SU!NC$qs$iT#>+@{ z*AJI47qQY%)Nc)dBLr-qGT`5%Q{u_pqr-^KoH3AhU5NmeCE{ zd|z=z3T=G^{7Z1LLGev7%mNfVTz8|_TTnUd1?T%k`@l$FbiQ-P!|Wc5zgCe!1QxK1 z8R^gIpaf5bUcy6G1-S}g&_sADxgfm=cp?DHdfY7v@c^n{d>cW=Vm+9vDcWLTNfEt9 z7qznD{mR7g=K;B(pa9bnk_UI9b0`bGoT*SRXm2O*^g4_jDy7|3swE@*LNBv2y{c$S zICc=yf7$!EeOCO&OsCTmor`BqJy>(w0pZE}F#Fh>Ei6o8B7G%Y?M3lP@rV4woJ5+1 z+ct^)iVO2uhJBjA)CXjb*JAnWr^;s<2*7beKNjuO6vyq5J`{*1U9H!0-H(W(V1TQA z%k^F0_+H38#T6Z6+ozIyH8*)o0kqk6`>Ez^BP!8dK{bB^UF61OX;3wT0;-LOM0&<= z?2D8lT{CKR-}|>~v*RBrz|V zv~G^Y#cdpoU!lz^5W)41AX+npZ~laKp8#7 zcG#91#J#O!3qtRC{9ZS(viPc?-R6C>!@5}b@q`NI4qFDY{63MX_OPS1-8w=H9!vPI zcp()=d7T!2et2eq4;dV0gt{NsTpn^yH^SZwWUpPm^nN-pdc}h7@iv``GIvH%pHk5} zHKn8O?U%tpc=8}+%)Nmx)e8+ZU+q&$Q#GVsQ%6jz8oUwl*OvXx?Bpxx+&iqQpw9}T zTITgl3!yD5et3^CL!Jo|sE#8SdAF)&wmw{(>E^`ntbE*i|6}WOUtX=_?d%B*1@tgA zznOetjBygU%)O-s$?bg>GVq?&-;8YSB@ljG3tE64+<1ia~Ae_rj( zXLrJNfC!wpQerkjPtTHgx-fp;jgGI=shL!JSpDn|peoSnE`kTr-;DBl%#le(CiAhc zgT|Ni4p+UoF3I(O=Iw!Pae#vIH*rP()88154+Axk?0i=xW&Iwo+OBGNtM_45%;x7w5X{f}Pm=TyxVC$ddJH1{Hm%ba?<%o#4*ndxAgb zK73cr+?OXKoJ4t%{1-cYJBryR`MqTELZ1ABPh;vMSNyvNM8T1fmbcSFH~n zS@ZqTRSEL4`ytXq-%@7NCO^@3`e3zM&^6~rnw;>sT!o#pO^NCSVXKm%j-%=Zd` zr;ToTZ*D{$>4E1i>*L0^Tn2%gKJII}Wgx_oql1lU<6FkZX=xFO2h!W`2^+!!oc1VI&^k z$e-r%K&1B|ze@2MdEUr}|MK!WE}DMxdB^YLMGjH>hBW^$fZJ<$m_Or*2|yMV-!WT>fBWm<9x@4di17MC<|%9oAj>D^ z=OeYz7Uu$`Kh;DdADnoD|g3UNg|W6UP)5N8XE21)8!_ zP+Av{&RX;y#L*KdhM^>>wgt=OhW$}eGfterx5kwZI)@dhFuX98kt@%T$p;h`$DvI?#z-A>ksgbVq7FkR~c)|;PUb-;#pq%s78c*ZjLN${BU z00FXw^nt!3^2TUZy&HGz6&0vyB!0|Xbf7oFrvPTd4ji;A;4aB-n5*mWctZhrXktvg zEd6A&;elEqzYSxNJK45}Jdw~Jn@yuMt+?fm$ivsHHa8s}2@+n)yxTzRudxh;66N2% zY`x$9ppK8)D4jCZZB%LXGRf7O%9W*Ey&HIsUCC2IsMfwE%J7pN`VQOWLlCxbg-#PY zZA#Jxw6zx{c4_3jr_DhJISL6bjt*`LsX)IB(gp>9`c0ya_j!U>|IzyW`$K~pI`5wc zMiRrd{>$)ud*k}*P!Xpi!i!ZJ>r${1MdNNBC=gUDO^Ga9T%BBwcdAE?naD1_0e14a zL6*|Mq(BV~PhCzM1U@Y7FmI*+lZ8Of6X*}q#aVng_jCUpio`3oFk$ZB4K{A2C4qGh z(`Hfxe_X5)j@EUa!gKLd60DY?u`dVnh#vJ z$3_i*zQm4h`t{+bzciKw*EV)CKAU&{D;bJZpi!s!2$uJ5!C5P~{NpdvPF*oK_9b>L za+grE=#W1eT0&u4u@Dj_Oz7W(mIl}YsP$I@-|HKpvY#sI)5-SQpdce#7%ntV^Wt%- z&HSXyoLoRa%0Hz?Z~~}pbm_Vz7*Cwv@h8`*z2l2{+nyO&xbfq$<3gJcQ}fXZx;!4s zLYeE?{jXzEHAKUJRX>x}RS~LWE2oT^6@_m~+O8~>%4^TcWFbyZ>yR12n+2K}+hY;J z2HxadnJDdl^?_K-HPXY={y93{vOw64ND!*gq_dK(-&I$yZ0^O_pfcGQt*>|SMkV`7 z-&4p-CP~_*=W!ZhDRnA}JGT)U@Cz=g#q>Dj#7Ba?_WI-ll_oj~BHowD=DH@mCkqW* zCOmMR^B>u&N5n$5KTG?*joDNhn<3R&dL>A`?^ zX{P3}DqY=?`0?9%>;>$4ykdp_m>YaerSd(zJ?>ObgT{p*M>{aFn#jyik-WA^)NMM`Yg z=g)49+H%H5wEJ-X01AIThF(Wz4S(C>K51_p9bk)z+!+d+A$*fy+vcNhTVZN|h*$$| z^l_VbS)-Lm7V>8*;AT~tkx+_fSUyc*WG8MUErH)~<@}l^2y-strjPpVnjg!odW+&M z32(LC&R8Y`R3qJ-gl0*#DbAu=v!ImrN+mt!XSVkGnoBiBaMe*%bUi4 zv;fCNV%|mJ)pbF|av9eH2xCpXV;7{l#t_f1GeHk(3TrE-Et(Tz)2Yw5wU68Gk@oBP z4?y@Ez=bKDB|vGuz;<_LmM$?F+9D7ni|1qp)caZR1%+i-_?gT&5?hOvEUA1H5j74h z+wHY}CUhUiNI9lEvpkPX7I-3(IG&rW9Tn0Z&W=2_lx~!>lvzP%fDWSO;S*w%;b7ss!9F{7_IQLVJv>{K*TqyxA!2?# zdC?dzlRr8t;ZrlN2(mpCe$#DprdWah%@vQVU=30470m{dhXy5Tj>LMGI%tlZ!m~5T zS&4|+ubzy6RX2DzQN>l3?AVyPbj4~QApFm}mO%$ljQ!Jsr@x~kQ6s

    ^=u9xd|* zc>({Jb1=|~9krD=c3b$Z!Vo#;R$e-A&8=naaDnjE^|e2o0RBgYh}L_Llt#BX@eW#& zf+b)7hW0)U3LD3SLxKW-OI(sf=QotwsHBEoyT~}2bhHNYrx|hGkd-_NK`=Zeq4qPp^4J()Ibs`6zKjOE%-M zSCRfI83|MXnddH8DC~u@Nb&RFUaC{i+uDa4FD2(H!MsN-PQokr zpV>rMo+w7SEpg)W5SZFz-QV@pK^h$)O$Oi7ole$@iWpD0<9lm09Xc7wkM z^QK-DM#*d9U}AD zI0`&%J5#P&GQ4@Rwa~D@ldjoKnk0^ZFGx>)3X$hMsR|_{e0n+wO1M`@6`maz=`r5i zYO~9F-Oum1)&&n3!^BoCe<`@~{O6#qw|2%GJ8A#m6#!t%yZQ$*nrS<~S)p*1a46tB zA#5iEN-41=0y68#mp>$Oc>15we^cq?4Pqe^G5}5`70pgx2QGI|HXTMwAKA4UO^j#= zrAlv(_r5uf791D=dE-b%ix+P^@(uUgTV+0Y+d*0VNV0ex)OI{aqyT?gA0%-4ac12k zi1-N>-u=uB(>$yAvqLf29Zh%R&(4Un)On!JdA8F)Tea$}&h=!2C`@fwzfWz?f~Wmd zWCb?ASZct~#4ck8>u01>3AkRiFJbSIr4Od}mK!W9Aa-wEWhb zT!jHoOh1WH z(%xZbgFDgzgk*}W@Ur0!4#-Y*2ydlZW><8(9j#X$Pc12>gL_x}pZCPvtetE&+>{30 z#=lvSz8A4mY2Z;7)l^~P?nT$rWvPDTw)IC#z7zuQoWG#dBuJeXX0F~J#@F(3Z{(7~ zH;MAFD>~NFQ-g(2!Ff&UT!Dwx0(Nl=d%fYKUXgN}(BX7_ufhVE+*?cTZOtf)#$3s?D z_K(wQ`OSRJnZDCqs*vwdtHSgR&fB{L$+`Hu+pR{J5Z>JVfqBKbWPyfr&ND}ITwQfA zqY+?pt}1DRP~22gYrA5XP!Jj8YjGjye74`>or0R>M$U&bD?H>f))_zYwG{v?n%YVC2Ohsz9YV0jA#sE+N!PpuI?~h7` z3+Lihb)3((;~Z4j_uBVVm+cHQ1z3;r4owR~HBh;jgTCsz?E0ubUhVvln73FmXHcS2XViVzWQPeAS+P2!UvUlzw(Bo- z9sy6u6{}HW5=MpQ)w6%EL8Y!mV~kyz-QyHNaSObk21z+S+ZnC+S8iv%nz{6`fDDhz z>xzYm3(coNfO^L{T&#vC=6=vzFLC2^n#qgP<+uCy(K{>Mc$)3)2OG$Irpsw%AL`fu z0MU05v{-o0f5r{p!F^mm;&N2ZG=z+W>ZTgW|4{8y%~y4@uotQ)?(s+C8Elxg)sV^qW)&*0Sg&_8P;kChK}CC!Uq17C3P7D_C+X8-0=H z)TRs+M}pon_Mt6H58Kr>UX=3BolqWTRFwC9I9lF%F^~Z}f$@%+tC!th!_CA7tL*#O_*pEm#Hm@kTIN)C?A8U+5tZfwSCj9^0~S zYa?=shz@~3U{bGu*Rj1*%LC3XQUa0vNJ%}dw}F>YVfM8tFZ@XAVR_f!G5hg(<^BOZ z5bsg&HuDuu=t(;V5=3I2h0m(r3<;`maRnzbKR#m!+|=_c7N;6_xm`nlMua%NHl3e2 z3nJRim0q~BK5ou^#;}{-fmuE|kOW8KV7ZgyI39M5wqBN61)mYNPrXwyFtsH6->vH`4hep#a9f_ z7tQBhgXehVinsN<%&eiNW`q@@PgP0F6YKhX?f^F2U>Kn{a#xUCv|)PGcx$FE%F%3`kkMj?7IF?{BlmGy(maL?h z#^1{Tz|?E_qx>tPSp`61cv&S<>P2>`FA5-0%k${I4~N-%38zNKu7|Igb8nc9VbMvi zvw&G^v?9PvL>G35fewq2}S(;o3dNjkLS6{X7rbQDRnKAp@ zf3b1?0G}*0~&~=5o%$B6kkmvKD)BJ9dX2(`uI0KM2Q}ZCB!tb>00O+Z>cu`;ewF z>7mo=SftzV32sAj@F7y@oHSSam?zh1UtiG*v$hZ#`Q>4FlWqJIwY}8k0AnZiA}gaQ zySX{&Wa%OaousAfN@;Ys?9}Q$2eHX(J>+XVjix)vT+O<@mY}x5aZ86$M?9`i@jc-S zKJb5f8IbrA=Xp00c1_YbHURY5S%*EuqipHz$GWRCjPYT3YyHeQOvUd}0wtKnxz2CbMtRRCEn^ zs2VGPcu{vFmi>Kv)wZejVe-ExNe&i&5gdnKOP6)6{o+7#Z%x9;u7{P}Pzhl4PWGuu zVQ%7i=iIWQCV#D^_J@B1bo{FZ%)RTHk<(Akb6|PCPJ6!V+`gbY?|Ad}0w$mlaJt2T$?qu1$dxqH!n5n4=g#^-KFLHPMBZ{JuGFQ- zcnSU!XlOACBhS6GPi=eVQ{6?_wkc%}mP#Fv!{^g2>R2?o z_E`j5Uf#3kJy{{z#9syzSX#4F%_H9}59>o@NfPvu6mtZrDuxxL6$3p~2bL>4Pymdf zXz-~g)j9nM7R3l}SMawPfvVa7t)Jj}9)lV^Lp+}v=bbD&&LUyZ<7u0WK1U6k+eyIm zp&b}oor$Anz359`A+DXnEAE<`S#tA!?S{aA6B$)IJ4`0m_V zBZdnMnbz6b^Yu6WnU{s@QEf8~`dsZ*1R0Ja`83#)UswnoAA1BQxzGs2x;@(M?9kC! zxsUljxUrNCw9meTt2Yf0+jhLUPt}?+ma0K^-S5_~9D?lTShJmsnY!9{((;lZGoKU= zP;dpWq}7`(a~(jXV0+h#orOxO055njNBi$d@NM7vIN(@I2C83QF8+9YIF zH0>;3=w3_i`R{i7$fsowO2=pY47?yKeVVONT>aodIqZ7bnb~QrYp&Gn;5o-0%8>uE z=)a?b-Cf4%LD1>>{dA>bB)X5kE!B=*^xoYWe+nh2=5Cs-Z~&)!PLC5!59bH{p-%fg zu=8XvDq$h}IYFdn!M*)TifDP;{}_ zdQxZklL)c|c3|zSu0YRdB>NF#U&Ng_vvV9D*t%@i-R==r^F@t)v+Z;^f=3u=y}a&A z@m{O7HeMWEh<4lZY?;z@jHHnytGKu0If-*t^4{*xfO!Zp=|a{6-i1S53>irMjqqyjJ&i8xOP6 zZh{l_j16QoJx-~D;8-#adaUO*hj~Fw-uGLZMou{6S$p+fEi zVj$`LG0Cb^({h(?*MQbX26vZGC}uIot=-XX!CMJDh!1sR?f^ZByj!gUmo|~n>Zf(4 zMf}w{H8X8`UR!75&LglLlZijqD_f~`8A8KHX1nNh z`?KiUtKb3?Xm(({Y713=nDn71N1Jl`J=Y_lID~$5=xO_rUe8bN0jR=>`Okj$cO~A( zVTC&eM~J`CyuIAzrld`Deu=T``0@>7cY*)mZbJqH_wvt9Y{M)?<>QY4cBXwdO$hz2 zswlN-ft*h>PttWkgB^^}&sn5WYEnmIwzy%Jg9#?~usCDmMg_gui~=nkxf>N~c@t(| z@40ss)YO=xPG9e*ap1D9`eCpMc?LQbgY!a5d!D2gZw&HFpKMvdN3ogftGAjcWgJSr z*42hFvX{BvV=T42YZ(XQBsLQS-i$w3q(3b^YvFw3Us5W_4RTykniYL^swmCTcPN~Y zLWp!+W4crUDko5weL_SH^3!H0&VCjfjt{VSd2?*Zq<8s)#LH_2iWZA=$;^)#+g6$I zW$GOlo@QD3zNA<=w0`8+?Q7}8?&Tk=0=g;c5%Ay zrXg0saf`FI`z{Mv`+MPO@kzZOlcE~0sK~{fZ;bOm0@#XOAm{I zoN!E(#6R7o(Bl+~zIabe)*Ei9otr*DDrA0G z$HOR+VO8>?z72Dv7wJFsA89R#z?ABpyr7^lbfWdo_X;6Ny@or{?Vr6AhJn)0fjIH> z3uh3otw>(`3GAF~43ecKC?#88)_V)3-5%V^9A==OZ3X`I=lKE0yK`>Op3nn{ictAE z!Cv9>yaCdAr3n|7-@+o7tv~!`T^#8_jy(rM-Jv+D^los_(1J+Aj52j?+gXa1xGaNlzen7J|7#kJ=w-(q@)4UT-Fa`9hOBITZ9Y&TmD%zh<1 zvtMO#@OpWarL6`LL(}V4le|g`U%E0(P0>tUzR}xsIEkucR=!eL)w{r9c5W4F2P$YM{lzoobec?U$*yI zo22k6c|}j!)}xHE(WG9(5E}Vycc^>LQS}uM{uupR>%|7UT}9Z63)PWZy=%#{?v?m_ zlPnL|#q9v)?oF!^Bncb$OJ%jHPSMTm^B9oB^nfB&T2uHhgf@o!^rL5#SqwT&48VvV zuF3qP_tS=qu+@$to!JTIWuyOdN6~DMOQibYLg0k9xwrHFrddwQlw~QsY}6^B1ZDn4KGbmUxUk^#^0J>$wv&u1=)aZ zjvnm9mzfe($eXQhXgLE{RQ!~V1l;RXP`;+XIecX?HJ#43opE`X9n9N~3CIvD$E~^V zf4*mh>QJ>ld789nxfv=*^g4BRs)qLOvc}_N>i#0=X_STsX)5l%9<1{+$NH&+kvu}Q zGS9|K^T3wGsZc}gLUyDlyhU@NaP+H?z7f}!^y^0GGMx0qMGJ91w#_Y@U|8r-G_X+Y z0;h)JNYSLsy-z+z%#$bLmGcI%Drmedu2&IDg!#GIF_Jv&gIsQs4&~?p(G&p0zpW=n zFCW+LQV|!ETDbr4D;#z3u&FXQO&C0Sc|2R9H>9JA=8j`vW0+g-rVdu>4=p>g1M0Kx zPTs%~{QC&`3y1CLQB5{#cR{eAdZ!@pQ=moC$>RkC42i+Rj|e=TBX`|QjW?cTEUuiZ z-;CgynYxQ`G4q7hT?bW?$CT0L!aG6B=1d$_vmw03Z_C=@-Rd`O+(dD5O(0FDD-9_S z1I_gdZ$zEx_S~at{dOp6oRHVbYN@khUX7scoxh9t@YEy2!h`{H=7r2I4;19}8+5i^ zHA#|YpJutKRFK}r!MQEC1aon42(TSnRRUBqemkx`xHqk9B2-jTabA?lC6xTi6#3%A zHd#rqzECaTFfq{$JuR;n+f)V#7maxw_2}mWE?v?U_7HiJ-W$D(t8wa8^^E@ul`q;~ z$vSp}i&ic7o7BfMtV-k`E|6`w#M89MfXwDFSd7x79xpOtbCZZ zM{6X3*vlZ!?ppY?Sysro*I9vRd@M4dvd_f>(war3w)oA&J%w~T)?Mm|!w8h?ZHIW9 zjrV-b0-{#2u@Nh97lF`R>^r}u!#h<_rwJv!5&KlYk9R~1)kcE6v;epMUp2Y4trz8t z?{4yn&W_+@de7WV=i16-kLLj{Facz3q5jc2fVA}&q|mBRf{{cUSGF=5ePWH)`-n`fJDVP9_8 z5}N+~Md|wS!}`oTSNo-2N|H-}M%9Ph-uNwX6#R*mxe|5r?AfXqs;%=E6`NFz4inAZ zlW?1f^Wh2zS3=?G)?elM%-Wzx3Yw9VGnwQ-w{5vtFJpIVO*9@xidHK*FS*51_hm@r zS)R=SV|555QltJSiU;l1-OwYDptp0d_N3@+_ilR2%vmf`|3F$(wBsdVmEORf-}{^B zl9rD~nt4QwQZ+yIRx=22zwYh?z#)r0?)V|YqKnfb{A5Ve3u(UF8*8if>?xB6N}aXJP|~o&rn&Yr3DW=DPEYmW z@`e-dn}CNU5S>HS4Q~8yhA>b&u)fF#2Pk00YhyE9a5)it-^ zF>jO;ga?lsKnGdYUAPr*&zW2w?=C8QWrt^PSFW>Ca-=OYBYWHWVu@#XZH0+p_jk>n zU+qZB+jX4%L1?WjPO=vz;B@ym_dM3!X|GZUL;XdG37Y7*+fwWBos<7XU9v_ToPvf+ z4)tm|Na%pV4?A$RBd*0E%W>s^_Q_-3&}Jx(1goPoPT6!QTYvsrHLQr7MrW^}KHp(k ztKh`$=$-24{3~;=$V^UG`v}>>he<-gTJ-FJ*vH2Ro{R~8ZhP;LRZy>RoOm*W`p&{A zcElUUb+i@(0Zt$;8nb_$J>z@7>&Nda-1eU`-L?{jN!aWTC3rHhfXrw?p_@se1e%r% zIp?kF!KsVjCIY^Ezs&jVSh8gG z9$%&651rP04-AQjEB39lp2L#dst?;s2KLsS5+^*@{f#a{nZT^mah|}}2Xjs>ycZr4 zMq+&)+N~U0k#Eq@J^j(Xymz$5SLZKsrq=mh$e7aN@XT}>P=ilJ80JvnId>K`M7L_S zpXXEhS6Ag2o?Uw1fAjS8IW$%}Awi-b`j;%Oyxz&M>H<3rOqPiCNC;EZnhaZNjR4hs z;Fm;yM2p?zbJ}|-EgvZf8b$&o4Gw`LuhgHXU?+S(k7ywYI#SSR^hfV_k*ng(i zadi7NM7~I@<>_geZSL(r-+Tj@q5O9fo>6lQz!R!lKlL?dc?a0*uYncrgAQ%(EH87P z)C4I3qVEIPW?%d>ssJPNMe`_~o-uRNlfG;*_~Qy!i2x0XnhLl1Hsa3Gn7Q4Nv0ZCI z1Mk4fC?G(Fk}>$7ua!q+JwZ#7=Bu=EGvaxh6<19F8tov^11Hh*kBovR#pAY4^}jc2 z%)LHxc_vzatfJO5`-lC3w`Bak|M0UIcLEY1xYFysJzeri5%|BD!+&?wzc-WN{I^+u zD>jk;*--wL$;bZ%rhhBN1ph*RcsZC0&U^m7_*W7w{{>}#EB{Y=^8foyGN*flSZBlj n{?tDrI{PoL__y}|@AJUkC=pK+bv0D{11)4fDoNIgn}q%!#n%nB literal 0 HcmV?d00001 diff --git a/data_utils/deepspeech_features/README.md b/data_utils/deepspeech_features/README.md new file mode 100644 index 0000000..c9f6c6b --- /dev/null +++ b/data_utils/deepspeech_features/README.md @@ -0,0 +1,20 @@ +# Routines for DeepSpeech features processing +Several routines for [DeepSpeech](https://github.com/mozilla/DeepSpeech) features processing, like speech features generation for [VOCA](https://github.com/TimoBolkart/voca) model. + +## Installation + +``` +pip3 install -r requirements.txt +``` + +## Usage + +Generate wav files: +``` +python3 extract_wav.py --in-video= +``` + +Generate files with DeepSpeech features: +``` +python3 extract_ds_features.py --input= +``` diff --git a/data_utils/deepspeech_features/deepspeech_features.py b/data_utils/deepspeech_features/deepspeech_features.py new file mode 100644 index 0000000..2efc586 --- /dev/null +++ b/data_utils/deepspeech_features/deepspeech_features.py @@ -0,0 +1,275 @@ +""" + DeepSpeech features processing routines. + NB: Based on VOCA code. See the corresponding license restrictions. +""" + +__all__ = ['conv_audios_to_deepspeech'] + +import numpy as np +import warnings +import resampy +from scipy.io import wavfile +from python_speech_features import mfcc +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() + +def conv_audios_to_deepspeech(audios, + out_files, + num_frames_info, + deepspeech_pb_path, + audio_window_size=1, + audio_window_stride=1): + """ + Convert list of audio files into files with DeepSpeech features. + + Parameters + ---------- + audios : list of str or list of None + Paths to input audio files. + out_files : list of str + Paths to output files with DeepSpeech features. + num_frames_info : list of int + List of numbers of frames. + deepspeech_pb_path : str + Path to DeepSpeech 0.1.0 frozen model. + audio_window_size : int, default 16 + Audio window size. + audio_window_stride : int, default 1 + Audio window stride. + """ + # deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm" + graph, logits_ph, input_node_ph, input_lengths_ph = prepare_deepspeech_net( + deepspeech_pb_path) + + with tf.compat.v1.Session(graph=graph) as sess: + for audio_file_path, out_file_path, num_frames in zip(audios, out_files, num_frames_info): + print(audio_file_path) + print(out_file_path) + audio_sample_rate, audio = wavfile.read(audio_file_path) + if audio.ndim != 1: + warnings.warn( + "Audio has multiple channels, the first channel is used") + audio = audio[:, 0] + ds_features = pure_conv_audio_to_deepspeech( + audio=audio, + audio_sample_rate=audio_sample_rate, + audio_window_size=audio_window_size, + audio_window_stride=audio_window_stride, + num_frames=num_frames, + net_fn=lambda x: sess.run( + logits_ph, + feed_dict={ + input_node_ph: x[np.newaxis, ...], + input_lengths_ph: [x.shape[0]]})) + + net_output = ds_features.reshape(-1, 29) + win_size = 16 + zero_pad = np.zeros((int(win_size / 2), net_output.shape[1])) + net_output = np.concatenate( + (zero_pad, net_output, zero_pad), axis=0) + windows = [] + for window_index in range(0, net_output.shape[0] - win_size, 2): + windows.append( + net_output[window_index:window_index + win_size]) + print(np.array(windows).shape) + np.save(out_file_path, np.array(windows)) + + +def prepare_deepspeech_net(deepspeech_pb_path): + """ + Load and prepare DeepSpeech network. + + Parameters + ---------- + deepspeech_pb_path : str + Path to DeepSpeech 0.1.0 frozen model. + + Returns + ------- + graph : obj + ThensorFlow graph. + logits_ph : obj + ThensorFlow placeholder for `logits`. + input_node_ph : obj + ThensorFlow placeholder for `input_node`. + input_lengths_ph : obj + ThensorFlow placeholder for `input_lengths`. + """ + # Load graph and place_holders: + with tf.io.gfile.GFile(deepspeech_pb_path, "rb") as f: + graph_def = tf.compat.v1.GraphDef() + graph_def.ParseFromString(f.read()) + + graph = tf.compat.v1.get_default_graph() + tf.import_graph_def(graph_def, name="deepspeech") + logits_ph = graph.get_tensor_by_name("deepspeech/logits:0") + input_node_ph = graph.get_tensor_by_name("deepspeech/input_node:0") + input_lengths_ph = graph.get_tensor_by_name("deepspeech/input_lengths:0") + + return graph, logits_ph, input_node_ph, input_lengths_ph + + +def pure_conv_audio_to_deepspeech(audio, + audio_sample_rate, + audio_window_size, + audio_window_stride, + num_frames, + net_fn): + """ + Core routine for converting audion into DeepSpeech features. + + Parameters + ---------- + audio : np.array + Audio data. + audio_sample_rate : int + Audio sample rate. + audio_window_size : int + Audio window size. + audio_window_stride : int + Audio window stride. + num_frames : int or None + Numbers of frames. + net_fn : func + Function for DeepSpeech model call. + + Returns + ------- + np.array + DeepSpeech features. + """ + target_sample_rate = 16000 + if audio_sample_rate != target_sample_rate: + resampled_audio = resampy.resample( + x=audio.astype(np.float), + sr_orig=audio_sample_rate, + sr_new=target_sample_rate) + else: + resampled_audio = audio.astype(np.float) + input_vector = conv_audio_to_deepspeech_input_vector( + audio=resampled_audio.astype(np.int16), + sample_rate=target_sample_rate, + num_cepstrum=26, + num_context=9) + + network_output = net_fn(input_vector) + # print(network_output.shape) + + deepspeech_fps = 50 + video_fps = 50 # Change this option if video fps is different + audio_len_s = float(audio.shape[0]) / audio_sample_rate + if num_frames is None: + num_frames = int(round(audio_len_s * video_fps)) + else: + video_fps = num_frames / audio_len_s + network_output = interpolate_features( + features=network_output[:, 0], + input_rate=deepspeech_fps, + output_rate=video_fps, + output_len=num_frames) + + # Make windows: + zero_pad = np.zeros((int(audio_window_size / 2), network_output.shape[1])) + network_output = np.concatenate( + (zero_pad, network_output, zero_pad), axis=0) + windows = [] + for window_index in range(0, network_output.shape[0] - audio_window_size, audio_window_stride): + windows.append( + network_output[window_index:window_index + audio_window_size]) + + return np.array(windows) + + +def conv_audio_to_deepspeech_input_vector(audio, + sample_rate, + num_cepstrum, + num_context): + """ + Convert audio raw data into DeepSpeech input vector. + + Parameters + ---------- + audio : np.array + Audio data. + audio_sample_rate : int + Audio sample rate. + num_cepstrum : int + Number of cepstrum. + num_context : int + Number of context. + + Returns + ------- + np.array + DeepSpeech input vector. + """ + # Get mfcc coefficients: + features = mfcc( + signal=audio, + samplerate=sample_rate, + numcep=num_cepstrum) + + # We only keep every second feature (BiRNN stride = 2): + features = features[::2] + + # One stride per time step in the input: + num_strides = len(features) + + # Add empty initial and final contexts: + empty_context = np.zeros((num_context, num_cepstrum), dtype=features.dtype) + features = np.concatenate((empty_context, features, empty_context)) + + # Create a view into the array with overlapping strides of size + # numcontext (past) + 1 (present) + numcontext (future): + window_size = 2 * num_context + 1 + train_inputs = np.lib.stride_tricks.as_strided( + features, + shape=(num_strides, window_size, num_cepstrum), + strides=(features.strides[0], + features.strides[0], features.strides[1]), + writeable=False) + + # Flatten the second and third dimensions: + train_inputs = np.reshape(train_inputs, [num_strides, -1]) + + train_inputs = np.copy(train_inputs) + train_inputs = (train_inputs - np.mean(train_inputs)) / \ + np.std(train_inputs) + + return train_inputs + + +def interpolate_features(features, + input_rate, + output_rate, + output_len): + """ + Interpolate DeepSpeech features. + + Parameters + ---------- + features : np.array + DeepSpeech features. + input_rate : int + input rate (FPS). + output_rate : int + Output rate (FPS). + output_len : int + Output data length. + + Returns + ------- + np.array + Interpolated data. + """ + input_len = features.shape[0] + num_features = features.shape[1] + input_timestamps = np.arange(input_len) / float(input_rate) + output_timestamps = np.arange(output_len) / float(output_rate) + output_features = np.zeros((output_len, num_features)) + for feature_idx in range(num_features): + output_features[:, feature_idx] = np.interp( + x=output_timestamps, + xp=input_timestamps, + fp=features[:, feature_idx]) + return output_features diff --git a/data_utils/deepspeech_features/deepspeech_store.py b/data_utils/deepspeech_features/deepspeech_store.py new file mode 100644 index 0000000..4c2f603 --- /dev/null +++ b/data_utils/deepspeech_features/deepspeech_store.py @@ -0,0 +1,172 @@ +""" + Routines for loading DeepSpeech model. +""" + +__all__ = ['get_deepspeech_model_file'] + +import os +import zipfile +import logging +import hashlib + + +deepspeech_features_repo_url = 'https://github.com/osmr/deepspeech_features' + + +def get_deepspeech_model_file(local_model_store_dir_path=os.path.join("~", ".tensorflow", "models")): + """ + Return location for the pretrained on local file system. This function will download from online model zoo when + model cannot be found or has mismatch. The root directory will be created if it doesn't exist. + + Parameters + ---------- + local_model_store_dir_path : str, default $TENSORFLOW_HOME/models + Location for keeping the model parameters. + + Returns + ------- + file_path + Path to the requested pretrained model file. + """ + sha1_hash = "b90017e816572ddce84f5843f1fa21e6a377975e" + file_name = "deepspeech-0_1_0-b90017e8.pb" + local_model_store_dir_path = os.path.expanduser(local_model_store_dir_path) + file_path = os.path.join(local_model_store_dir_path, file_name) + if os.path.exists(file_path): + if _check_sha1(file_path, sha1_hash): + return file_path + else: + logging.warning("Mismatch in the content of model file detected. Downloading again.") + else: + logging.info("Model file not found. Downloading to {}.".format(file_path)) + + if not os.path.exists(local_model_store_dir_path): + os.makedirs(local_model_store_dir_path) + + zip_file_path = file_path + ".zip" + _download( + url="{repo_url}/releases/download/{repo_release_tag}/{file_name}.zip".format( + repo_url=deepspeech_features_repo_url, + repo_release_tag="v0.0.1", + file_name=file_name), + path=zip_file_path, + overwrite=True) + with zipfile.ZipFile(zip_file_path) as zf: + zf.extractall(local_model_store_dir_path) + os.remove(zip_file_path) + + if _check_sha1(file_path, sha1_hash): + return file_path + else: + raise ValueError("Downloaded file has different hash. Please try again.") + + +def _download(url, path=None, overwrite=False, sha1_hash=None, retries=5, verify_ssl=True): + """ + Download an given URL + + Parameters + ---------- + url : str + URL to download + path : str, optional + Destination path to store downloaded file. By default stores to the + current directory with same name as in url. + overwrite : bool, optional + Whether to overwrite destination file if already exists. + sha1_hash : str, optional + Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified + but doesn't match. + retries : integer, default 5 + The number of times to attempt the download in case of failure or non 200 return codes + verify_ssl : bool, default True + Verify SSL certificates. + + Returns + ------- + str + The file path of the downloaded file. + """ + import warnings + try: + import requests + except ImportError: + class requests_failed_to_import(object): + pass + requests = requests_failed_to_import + + if path is None: + fname = url.split("/")[-1] + # Empty filenames are invalid + assert fname, "Can't construct file-name from this URL. Please set the `path` option manually." + else: + path = os.path.expanduser(path) + if os.path.isdir(path): + fname = os.path.join(path, url.split("/")[-1]) + else: + fname = path + assert retries >= 0, "Number of retries should be at least 0" + + if not verify_ssl: + warnings.warn( + "Unverified HTTPS request is being made (verify_ssl=False). " + "Adding certificate verification is strongly advised.") + + if overwrite or not os.path.exists(fname) or (sha1_hash and not _check_sha1(fname, sha1_hash)): + dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname))) + if not os.path.exists(dirname): + os.makedirs(dirname) + while retries + 1 > 0: + # Disable pyling too broad Exception + # pylint: disable=W0703 + try: + print("Downloading {} from {}...".format(fname, url)) + r = requests.get(url, stream=True, verify=verify_ssl) + if r.status_code != 200: + raise RuntimeError("Failed downloading url {}".format(url)) + with open(fname, "wb") as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + if sha1_hash and not _check_sha1(fname, sha1_hash): + raise UserWarning("File {} is downloaded but the content hash does not match." + " The repo may be outdated or download may be incomplete. " + "If the `repo_url` is overridden, consider switching to " + "the default repo.".format(fname)) + break + except Exception as e: + retries -= 1 + if retries <= 0: + raise e + else: + print("download failed, retrying, {} attempt{} left" + .format(retries, "s" if retries > 1 else "")) + + return fname + + +def _check_sha1(filename, sha1_hash): + """ + Check whether the sha1 hash of the file content matches the expected hash. + + Parameters + ---------- + filename : str + Path to the file. + sha1_hash : str + Expected sha1 hash in hexadecimal digits. + + Returns + ------- + bool + Whether the file content matches the expected hash. + """ + sha1 = hashlib.sha1() + with open(filename, "rb") as f: + while True: + data = f.read(1048576) + if not data: + break + sha1.update(data) + + return sha1.hexdigest() == sha1_hash diff --git a/data_utils/deepspeech_features/extract_ds_features.py b/data_utils/deepspeech_features/extract_ds_features.py new file mode 100644 index 0000000..db525d1 --- /dev/null +++ b/data_utils/deepspeech_features/extract_ds_features.py @@ -0,0 +1,132 @@ +""" + Script for extracting DeepSpeech features from audio file. +""" + +import os +import argparse +import numpy as np +import pandas as pd +from deepspeech_store import get_deepspeech_model_file +from deepspeech_features import conv_audios_to_deepspeech + + +def parse_args(): + """ + Create python script parameters. + Returns + ------- + ArgumentParser + Resulted args. + """ + parser = argparse.ArgumentParser( + description="Extract DeepSpeech features from audio file", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--input", + type=str, + required=True, + help="path to input audio file or directory") + parser.add_argument( + "--output", + type=str, + help="path to output file with DeepSpeech features") + parser.add_argument( + "--deepspeech", + type=str, + help="path to DeepSpeech 0.1.0 frozen model") + parser.add_argument( + "--metainfo", + type=str, + help="path to file with meta-information") + + args = parser.parse_args() + return args + + +def extract_features(in_audios, + out_files, + deepspeech_pb_path, + metainfo_file_path=None): + """ + Real extract audio from video file. + Parameters + ---------- + in_audios : list of str + Paths to input audio files. + out_files : list of str + Paths to output files with DeepSpeech features. + deepspeech_pb_path : str + Path to DeepSpeech 0.1.0 frozen model. + metainfo_file_path : str, default None + Path to file with meta-information. + """ + #deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm" + if metainfo_file_path is None: + num_frames_info = [None] * len(in_audios) + else: + train_df = pd.read_csv( + metainfo_file_path, + sep="\t", + index_col=False, + dtype={"Id": np.int, "File": np.unicode, "Count": np.int}) + num_frames_info = train_df["Count"].values + assert (len(num_frames_info) == len(in_audios)) + + for i, in_audio in enumerate(in_audios): + if not out_files[i]: + file_stem, _ = os.path.splitext(in_audio) + out_files[i] = file_stem + ".npy" + #print(out_files[i]) + conv_audios_to_deepspeech( + audios=in_audios, + out_files=out_files, + num_frames_info=num_frames_info, + deepspeech_pb_path=deepspeech_pb_path) + + +def main(): + """ + Main body of script. + """ + args = parse_args() + in_audio = os.path.expanduser(args.input) + if not os.path.exists(in_audio): + raise Exception("Input file/directory doesn't exist: {}".format(in_audio)) + deepspeech_pb_path = args.deepspeech + #add + deepspeech_pb_path = True + args.deepspeech = '~/.tensorflow/models/deepspeech-0_1_0-b90017e8.pb' + #deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm" + if deepspeech_pb_path is None: + deepspeech_pb_path = "" + if deepspeech_pb_path: + deepspeech_pb_path = os.path.expanduser(args.deepspeech) + if not os.path.exists(deepspeech_pb_path): + deepspeech_pb_path = get_deepspeech_model_file() + if os.path.isfile(in_audio): + extract_features( + in_audios=[in_audio], + out_files=[args.output], + deepspeech_pb_path=deepspeech_pb_path, + metainfo_file_path=args.metainfo) + else: + audio_file_paths = [] + for file_name in os.listdir(in_audio): + if not os.path.isfile(os.path.join(in_audio, file_name)): + continue + _, file_ext = os.path.splitext(file_name) + if file_ext.lower() == ".wav": + audio_file_path = os.path.join(in_audio, file_name) + audio_file_paths.append(audio_file_path) + audio_file_paths = sorted(audio_file_paths) + out_file_paths = [""] * len(audio_file_paths) + extract_features( + in_audios=audio_file_paths, + out_files=out_file_paths, + deepspeech_pb_path=deepspeech_pb_path, + metainfo_file_path=args.metainfo) + + +if __name__ == "__main__": + main() + diff --git a/data_utils/deepspeech_features/extract_wav.py b/data_utils/deepspeech_features/extract_wav.py new file mode 100644 index 0000000..5f39e8b --- /dev/null +++ b/data_utils/deepspeech_features/extract_wav.py @@ -0,0 +1,87 @@ +""" + Script for extracting audio (16-bit, mono, 22000 Hz) from video file. +""" + +import os +import argparse +import subprocess + + +def parse_args(): + """ + Create python script parameters. + + Returns + ------- + ArgumentParser + Resulted args. + """ + parser = argparse.ArgumentParser( + description="Extract audio from video file", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--in-video", + type=str, + required=True, + help="path to input video file or directory") + parser.add_argument( + "--out-audio", + type=str, + help="path to output audio file") + + args = parser.parse_args() + return args + + +def extract_audio(in_video, + out_audio): + """ + Real extract audio from video file. + + Parameters + ---------- + in_video : str + Path to input video file. + out_audio : str + Path to output audio file. + """ + if not out_audio: + file_stem, _ = os.path.splitext(in_video) + out_audio = file_stem + ".wav" + # command1 = "ffmpeg -i {in_video} -vn -acodec copy {aac_audio}" + # command2 = "ffmpeg -i {aac_audio} -vn -acodec pcm_s16le -ac 1 -ar 22000 {out_audio}" + # command = "ffmpeg -i {in_video} -vn -acodec pcm_s16le -ac 1 -ar 22000 {out_audio}" + command = "ffmpeg -i {in_video} -vn -acodec pcm_s16le -ac 1 -ar 16000 {out_audio}" + subprocess.call([command.format(in_video=in_video, out_audio=out_audio)], shell=True) + + +def main(): + """ + Main body of script. + """ + args = parse_args() + in_video = os.path.expanduser(args.in_video) + if not os.path.exists(in_video): + raise Exception("Input file/directory doesn't exist: {}".format(in_video)) + if os.path.isfile(in_video): + extract_audio( + in_video=in_video, + out_audio=args.out_audio) + else: + video_file_paths = [] + for file_name in os.listdir(in_video): + if not os.path.isfile(os.path.join(in_video, file_name)): + continue + _, file_ext = os.path.splitext(file_name) + if file_ext.lower() in (".mp4", ".mkv", ".avi"): + video_file_path = os.path.join(in_video, file_name) + video_file_paths.append(video_file_path) + video_file_paths = sorted(video_file_paths) + for video_file_path in video_file_paths: + extract_audio( + in_video=video_file_path, + out_audio="") + + +if __name__ == "__main__": + main() diff --git a/data_utils/deepspeech_features/fea_win.py b/data_utils/deepspeech_features/fea_win.py new file mode 100644 index 0000000..4f9c666 --- /dev/null +++ b/data_utils/deepspeech_features/fea_win.py @@ -0,0 +1,11 @@ +import numpy as np + +net_output = np.load('french.ds.npy').reshape(-1, 29) +win_size = 16 +zero_pad = np.zeros((int(win_size / 2), net_output.shape[1])) +net_output = np.concatenate((zero_pad, net_output, zero_pad), axis=0) +windows = [] +for window_index in range(0, net_output.shape[0] - win_size, 2): + windows.append(net_output[window_index:window_index + win_size]) +print(np.array(windows).shape) +np.save('aud_french.npy', np.array(windows)) diff --git a/data_utils/face_parsing/logger.py b/data_utils/face_parsing/logger.py new file mode 100644 index 0000000..ad8452b --- /dev/null +++ b/data_utils/face_parsing/logger.py @@ -0,0 +1,23 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + + +import os.path as osp +import time +import sys +import logging + +import torch.distributed as dist + + +def setup_logger(logpth): + logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S')) + logfile = osp.join(logpth, logfile) + FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s' + log_level = logging.INFO + if dist.is_initialized() and not dist.get_rank()==0: + log_level = logging.ERROR + logging.basicConfig(level=log_level, format=FORMAT, filename=logfile) + logging.root.addHandler(logging.StreamHandler()) + + diff --git a/data_utils/face_parsing/model.py b/data_utils/face_parsing/model.py new file mode 100644 index 0000000..43181f0 --- /dev/null +++ b/data_utils/face_parsing/model.py @@ -0,0 +1,285 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +from resnet import Resnet18 +# from modules.bn import InPlaceABNSync as BatchNorm2d + + +class ConvBNReLU(nn.Module): + def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs): + super(ConvBNReLU, self).__init__() + self.conv = nn.Conv2d(in_chan, + out_chan, + kernel_size = ks, + stride = stride, + padding = padding, + bias = False) + self.bn = nn.BatchNorm2d(out_chan) + self.init_weight() + + def forward(self, x): + x = self.conv(x) + x = F.relu(self.bn(x)) + return x + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + +class BiSeNetOutput(nn.Module): + def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs): + super(BiSeNetOutput, self).__init__() + self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1) + self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False) + self.init_weight() + + def forward(self, x): + x = self.conv(x) + x = self.conv_out(x) + return x + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +class AttentionRefinementModule(nn.Module): + def __init__(self, in_chan, out_chan, *args, **kwargs): + super(AttentionRefinementModule, self).__init__() + self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1) + self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False) + self.bn_atten = nn.BatchNorm2d(out_chan) + self.sigmoid_atten = nn.Sigmoid() + self.init_weight() + + def forward(self, x): + feat = self.conv(x) + atten = F.avg_pool2d(feat, feat.size()[2:]) + atten = self.conv_atten(atten) + atten = self.bn_atten(atten) + atten = self.sigmoid_atten(atten) + out = torch.mul(feat, atten) + return out + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + +class ContextPath(nn.Module): + def __init__(self, *args, **kwargs): + super(ContextPath, self).__init__() + self.resnet = Resnet18() + self.arm16 = AttentionRefinementModule(256, 128) + self.arm32 = AttentionRefinementModule(512, 128) + self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) + self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) + self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0) + + self.init_weight() + + def forward(self, x): + H0, W0 = x.size()[2:] + feat8, feat16, feat32 = self.resnet(x) + H8, W8 = feat8.size()[2:] + H16, W16 = feat16.size()[2:] + H32, W32 = feat32.size()[2:] + + avg = F.avg_pool2d(feat32, feat32.size()[2:]) + avg = self.conv_avg(avg) + avg_up = F.interpolate(avg, (H32, W32), mode='nearest') + + feat32_arm = self.arm32(feat32) + feat32_sum = feat32_arm + avg_up + feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest') + feat32_up = self.conv_head32(feat32_up) + + feat16_arm = self.arm16(feat16) + feat16_sum = feat16_arm + feat32_up + feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest') + feat16_up = self.conv_head16(feat16_up) + + return feat8, feat16_up, feat32_up # x8, x8, x16 + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, (nn.Linear, nn.Conv2d)): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +### This is not used, since I replace this with the resnet feature with the same size +class SpatialPath(nn.Module): + def __init__(self, *args, **kwargs): + super(SpatialPath, self).__init__() + self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3) + self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) + self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) + self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0) + self.init_weight() + + def forward(self, x): + feat = self.conv1(x) + feat = self.conv2(feat) + feat = self.conv3(feat) + feat = self.conv_out(feat) + return feat + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +class FeatureFusionModule(nn.Module): + def __init__(self, in_chan, out_chan, *args, **kwargs): + super(FeatureFusionModule, self).__init__() + self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0) + self.conv1 = nn.Conv2d(out_chan, + out_chan//4, + kernel_size = 1, + stride = 1, + padding = 0, + bias = False) + self.conv2 = nn.Conv2d(out_chan//4, + out_chan, + kernel_size = 1, + stride = 1, + padding = 0, + bias = False) + self.relu = nn.ReLU(inplace=True) + self.sigmoid = nn.Sigmoid() + self.init_weight() + + def forward(self, fsp, fcp): + fcat = torch.cat([fsp, fcp], dim=1) + feat = self.convblk(fcat) + atten = F.avg_pool2d(feat, feat.size()[2:]) + atten = self.conv1(atten) + atten = self.relu(atten) + atten = self.conv2(atten) + atten = self.sigmoid(atten) + feat_atten = torch.mul(feat, atten) + feat_out = feat_atten + feat + return feat_out + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +class BiSeNet(nn.Module): + def __init__(self, n_classes, *args, **kwargs): + super(BiSeNet, self).__init__() + self.cp = ContextPath() + ## here self.sp is deleted + self.ffm = FeatureFusionModule(256, 256) + self.conv_out = BiSeNetOutput(256, 256, n_classes) + self.conv_out16 = BiSeNetOutput(128, 64, n_classes) + self.conv_out32 = BiSeNetOutput(128, 64, n_classes) + self.init_weight() + + def forward(self, x): + H, W = x.size()[2:] + feat_res8, feat_cp8, feat_cp16 = self.cp(x) # here return res3b1 feature + feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature + feat_fuse = self.ffm(feat_sp, feat_cp8) + + feat_out = self.conv_out(feat_fuse) + feat_out16 = self.conv_out16(feat_cp8) + feat_out32 = self.conv_out32(feat_cp16) + + feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True) + feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True) + feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True) + + # return feat_out, feat_out16, feat_out32 + return feat_out + + def init_weight(self): + for ly in self.children(): + if isinstance(ly, nn.Conv2d): + nn.init.kaiming_normal_(ly.weight, a=1) + if not ly.bias is None: nn.init.constant_(ly.bias, 0) + + def get_params(self): + wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], [] + for name, child in self.named_children(): + child_wd_params, child_nowd_params = child.get_params() + if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput): + lr_mul_wd_params += child_wd_params + lr_mul_nowd_params += child_nowd_params + else: + wd_params += child_wd_params + nowd_params += child_nowd_params + return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params + + +if __name__ == "__main__": + net = BiSeNet(19) + net.cuda() + net.eval() + in_ten = torch.randn(16, 3, 640, 480).cuda() + out, out16, out32 = net(in_ten) + print(out.shape) + + net.get_params() diff --git a/data_utils/face_parsing/resnet.py b/data_utils/face_parsing/resnet.py new file mode 100644 index 0000000..64969da --- /dev/null +++ b/data_utils/face_parsing/resnet.py @@ -0,0 +1,109 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as modelzoo + +# from modules.bn import InPlaceABNSync as BatchNorm2d + +resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth' + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + def __init__(self, in_chan, out_chan, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(in_chan, out_chan, stride) + self.bn1 = nn.BatchNorm2d(out_chan) + self.conv2 = conv3x3(out_chan, out_chan) + self.bn2 = nn.BatchNorm2d(out_chan) + self.relu = nn.ReLU(inplace=True) + self.downsample = None + if in_chan != out_chan or stride != 1: + self.downsample = nn.Sequential( + nn.Conv2d(in_chan, out_chan, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(out_chan), + ) + + def forward(self, x): + residual = self.conv1(x) + residual = F.relu(self.bn1(residual)) + residual = self.conv2(residual) + residual = self.bn2(residual) + + shortcut = x + if self.downsample is not None: + shortcut = self.downsample(x) + + out = shortcut + residual + out = self.relu(out) + return out + + +def create_layer_basic(in_chan, out_chan, bnum, stride=1): + layers = [BasicBlock(in_chan, out_chan, stride=stride)] + for i in range(bnum-1): + layers.append(BasicBlock(out_chan, out_chan, stride=1)) + return nn.Sequential(*layers) + + +class Resnet18(nn.Module): + def __init__(self): + super(Resnet18, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1) + self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2) + self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2) + self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2) + self.init_weight() + + def forward(self, x): + x = self.conv1(x) + x = F.relu(self.bn1(x)) + x = self.maxpool(x) + + x = self.layer1(x) + feat8 = self.layer2(x) # 1/8 + feat16 = self.layer3(feat8) # 1/16 + feat32 = self.layer4(feat16) # 1/32 + return feat8, feat16, feat32 + + def init_weight(self): + state_dict = modelzoo.load_url(resnet18_url) + self_state_dict = self.state_dict() + for k, v in state_dict.items(): + if 'fc' in k: continue + self_state_dict.update({k: v}) + self.load_state_dict(self_state_dict) + + def get_params(self): + wd_params, nowd_params = [], [] + for name, module in self.named_modules(): + if isinstance(module, (nn.Linear, nn.Conv2d)): + wd_params.append(module.weight) + if not module.bias is None: + nowd_params.append(module.bias) + elif isinstance(module, nn.BatchNorm2d): + nowd_params += list(module.parameters()) + return wd_params, nowd_params + + +if __name__ == "__main__": + net = Resnet18() + x = torch.randn(16, 3, 224, 224) + out = net(x) + print(out[0].size()) + print(out[1].size()) + print(out[2].size()) + net.get_params() diff --git a/data_utils/face_parsing/test.py b/data_utils/face_parsing/test.py new file mode 100644 index 0000000..ede8481 --- /dev/null +++ b/data_utils/face_parsing/test.py @@ -0,0 +1,98 @@ +#!/usr/bin/python +# -*- encoding: utf-8 -*- +import numpy as np +from model import BiSeNet + +import torch + +import os +import os.path as osp + +from PIL import Image +import torchvision.transforms as transforms +import cv2 +from pathlib import Path +import configargparse +import tqdm + +# import ttach as tta + +def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg', + img_size=(512, 512)): + im = np.array(im) + vis_im = im.copy().astype(np.uint8) + vis_parsing_anno = parsing_anno.copy().astype(np.uint8) + vis_parsing_anno = cv2.resize( + vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST) + vis_parsing_anno_color = np.zeros( + (vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + np.array([255, 255, 255]) # + 255 + + num_of_class = np.max(vis_parsing_anno) + # print(num_of_class) + for pi in range(1, 14): + index = np.where(vis_parsing_anno == pi) + vis_parsing_anno_color[index[0], index[1], :] = np.array([255, 0, 0]) + + for pi in range(14, 16): + index = np.where(vis_parsing_anno == pi) + vis_parsing_anno_color[index[0], index[1], :] = np.array([0, 255, 0]) + for pi in range(16, 17): + index = np.where(vis_parsing_anno == pi) + vis_parsing_anno_color[index[0], index[1], :] = np.array([0, 0, 255]) + for pi in range(17, num_of_class+1): + index = np.where(vis_parsing_anno == pi) + vis_parsing_anno_color[index[0], index[1], :] = np.array([255, 0, 0]) + + vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8) + index = np.where(vis_parsing_anno == num_of_class-1) + vis_im = cv2.resize(vis_parsing_anno_color, img_size, + interpolation=cv2.INTER_NEAREST) + if save_im: + cv2.imwrite(save_path, vis_im) + + +def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'): + + Path(respth).mkdir(parents=True, exist_ok=True) + + print(f'[INFO] loading model...') + n_classes = 19 + net = BiSeNet(n_classes=n_classes) + net.cuda() + net.load_state_dict(torch.load(cp)) + net.eval() + + to_tensor = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + + image_paths = os.listdir(dspth) + + with torch.no_grad(): + for image_path in tqdm.tqdm(image_paths): + if image_path.endswith('.jpg') or image_path.endswith('.png'): + img = Image.open(osp.join(dspth, image_path)) + ori_size = img.size + image = img.resize((512, 512), Image.BILINEAR) + image = image.convert("RGB") + img = to_tensor(image) + + # test-time augmentation. + inputs = torch.unsqueeze(img, 0) # [1, 3, 512, 512] + outputs = net(inputs.cuda()) + parsing = outputs.mean(0).cpu().numpy().argmax(0) + + image_path = int(image_path[:-4]) + image_path = str(image_path) + '.png' + + vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path), img_size=ori_size) + + +if __name__ == "__main__": + parser = configargparse.ArgumentParser() + parser.add_argument('--respath', type=str, default='./result/', help='result path for label') + parser.add_argument('--imgpath', type=str, default='./imgs/', help='path for input images') + parser.add_argument('--modelpath', type=str, default='data_utils/face_parsing/79999_iter.pth') + args = parser.parse_args() + evaluate(respth=args.respath, dspth=args.imgpath, cp=args.modelpath) diff --git a/data_utils/face_tracking/__init__.py b/data_utils/face_tracking/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/data_utils/face_tracking/convert_BFM.py b/data_utils/face_tracking/convert_BFM.py new file mode 100644 index 0000000..5c64af6 --- /dev/null +++ b/data_utils/face_tracking/convert_BFM.py @@ -0,0 +1,39 @@ +import numpy as np +from scipy.io import loadmat + +original_BFM = loadmat("3DMM/01_MorphableModel.mat") +sub_inds = np.load("3DMM/topology_info.npy", allow_pickle=True).item()["sub_inds"] + +shapePC = original_BFM["shapePC"] +shapeEV = original_BFM["shapeEV"] +shapeMU = original_BFM["shapeMU"] +texPC = original_BFM["texPC"] +texEV = original_BFM["texEV"] +texMU = original_BFM["texMU"] + +b_shape = shapePC.reshape(-1, 199).transpose(1, 0).reshape(199, -1, 3) +mu_shape = shapeMU.reshape(-1, 3) + +b_tex = texPC.reshape(-1, 199).transpose(1, 0).reshape(199, -1, 3) +mu_tex = texMU.reshape(-1, 3) + +b_shape = b_shape[:, sub_inds, :].reshape(199, -1) +mu_shape = mu_shape[sub_inds, :].reshape(-1) +b_tex = b_tex[:, sub_inds, :].reshape(199, -1) +mu_tex = mu_tex[sub_inds, :].reshape(-1) + +exp_info = np.load("3DMM/exp_info.npy", allow_pickle=True).item() +np.save( + "3DMM/3DMM_info.npy", + { + "mu_shape": mu_shape, + "b_shape": b_shape, + "sig_shape": shapeEV.reshape(-1), + "mu_exp": exp_info["mu_exp"], + "b_exp": exp_info["base_exp"], + "sig_exp": exp_info["sig_exp"], + "mu_tex": mu_tex, + "b_tex": b_tex, + "sig_tex": texEV.reshape(-1), + }, +) diff --git a/data_utils/face_tracking/data_loader.py b/data_utils/face_tracking/data_loader.py new file mode 100644 index 0000000..ba89904 --- /dev/null +++ b/data_utils/face_tracking/data_loader.py @@ -0,0 +1,16 @@ +import os +import torch +import numpy as np + + +def load_dir(path, start, end): + lmss = [] + imgs_paths = [] + for i in range(start, end): + if os.path.isfile(os.path.join(path, str(i) + ".lms")): + lms = np.loadtxt(os.path.join(path, str(i) + ".lms"), dtype=np.float32) + lmss.append(lms) + imgs_paths.append(os.path.join(path, str(i) + ".jpg")) + lmss = np.stack(lmss) + lmss = torch.as_tensor(lmss).cuda() + return lmss, imgs_paths diff --git a/data_utils/face_tracking/face_tracker.py b/data_utils/face_tracking/face_tracker.py new file mode 100644 index 0000000..438d112 --- /dev/null +++ b/data_utils/face_tracking/face_tracker.py @@ -0,0 +1,390 @@ +import os +import sys +import cv2 +import argparse +from pathlib import Path +import torch +import numpy as np +from data_loader import load_dir +from facemodel import Face_3DMM +from util import * +from render_3dmm import Render_3DMM + + +# torch.autograd.set_detect_anomaly(True) + +dir_path = os.path.dirname(os.path.realpath(__file__)) + + +def set_requires_grad(tensor_list): + for tensor in tensor_list: + tensor.requires_grad = True + + +parser = argparse.ArgumentParser() +parser.add_argument( + "--path", type=str, default="obama/ori_imgs", help="idname of target person" +) +parser.add_argument("--img_h", type=int, default=512, help="image height") +parser.add_argument("--img_w", type=int, default=512, help="image width") +parser.add_argument("--frame_num", type=int, default=11000, help="image number") +args = parser.parse_args() + +start_id = 0 +end_id = args.frame_num + +lms, img_paths = load_dir(args.path, start_id, end_id) +num_frames = lms.shape[0] +h, w = args.img_h, args.img_w +cxy = torch.tensor((w / 2.0, h / 2.0), dtype=torch.float).cuda() +id_dim, exp_dim, tex_dim, point_num = 100, 79, 100, 34650 +model_3dmm = Face_3DMM( + os.path.join(dir_path, "3DMM"), id_dim, exp_dim, tex_dim, point_num +) + +# only use one image per 40 to do fit the focal length +sel_ids = np.arange(0, num_frames, 40) +sel_num = sel_ids.shape[0] +arg_focal = 1600 +arg_landis = 1e5 + +print(f'[INFO] fitting focal length...') + +# fit the focal length +for focal in range(600, 1500, 100): + id_para = lms.new_zeros((1, id_dim), requires_grad=True) + exp_para = lms.new_zeros((sel_num, exp_dim), requires_grad=True) + euler_angle = lms.new_zeros((sel_num, 3), requires_grad=True) + trans = lms.new_zeros((sel_num, 3), requires_grad=True) + trans.data[:, 2] -= 7 + focal_length = lms.new_zeros(1, requires_grad=False) + focal_length.data += focal + set_requires_grad([id_para, exp_para, euler_angle, trans]) + + optimizer_idexp = torch.optim.Adam([id_para, exp_para], lr=0.1) + optimizer_frame = torch.optim.Adam([euler_angle, trans], lr=0.1) + + for iter in range(2000): + id_para_batch = id_para.expand(sel_num, -1) + geometry = model_3dmm.get_3dlandmarks( + id_para_batch, exp_para, euler_angle, trans, focal_length, cxy + ) + proj_geo = forward_transform(geometry, euler_angle, trans, focal_length, cxy) + loss_lan = cal_lan_loss(proj_geo[:, :, :2], lms[sel_ids].detach()) + loss = loss_lan + optimizer_frame.zero_grad() + loss.backward() + optimizer_frame.step() + # if iter % 100 == 0: + # print(focal, 'pose', iter, loss.item()) + + for iter in range(2500): + id_para_batch = id_para.expand(sel_num, -1) + geometry = model_3dmm.get_3dlandmarks( + id_para_batch, exp_para, euler_angle, trans, focal_length, cxy + ) + proj_geo = forward_transform(geometry, euler_angle, trans, focal_length, cxy) + loss_lan = cal_lan_loss(proj_geo[:, :, :2], lms[sel_ids].detach()) + loss_regid = torch.mean(id_para * id_para) + loss_regexp = torch.mean(exp_para * exp_para) + loss = loss_lan + loss_regid * 0.5 + loss_regexp * 0.4 + optimizer_idexp.zero_grad() + optimizer_frame.zero_grad() + loss.backward() + optimizer_idexp.step() + optimizer_frame.step() + # if iter % 100 == 0: + # print(focal, 'poseidexp', iter, loss_lan.item(), loss_regid.item(), loss_regexp.item()) + + if iter % 1500 == 0 and iter >= 1500: + for param_group in optimizer_idexp.param_groups: + param_group["lr"] *= 0.2 + for param_group in optimizer_frame.param_groups: + param_group["lr"] *= 0.2 + + print(focal, loss_lan.item(), torch.mean(trans[:, 2]).item()) + + if loss_lan.item() < arg_landis: + arg_landis = loss_lan.item() + arg_focal = focal + +print("[INFO] find best focal:", arg_focal) + +print(f'[INFO] coarse fitting...') + +# for all frames, do a coarse fitting ??? +id_para = lms.new_zeros((1, id_dim), requires_grad=True) +exp_para = lms.new_zeros((num_frames, exp_dim), requires_grad=True) +tex_para = lms.new_zeros( + (1, tex_dim), requires_grad=True +) # not optimized in this block ??? +euler_angle = lms.new_zeros((num_frames, 3), requires_grad=True) +trans = lms.new_zeros((num_frames, 3), requires_grad=True) +light_para = lms.new_zeros((num_frames, 27), requires_grad=True) +trans.data[:, 2] -= 7 # ??? +focal_length = lms.new_zeros(1, requires_grad=True) +focal_length.data += arg_focal + +set_requires_grad([id_para, exp_para, tex_para, euler_angle, trans, light_para]) + +optimizer_idexp = torch.optim.Adam([id_para, exp_para], lr=0.1) +optimizer_frame = torch.optim.Adam([euler_angle, trans], lr=1) + +for iter in range(1500): + id_para_batch = id_para.expand(num_frames, -1) + geometry = model_3dmm.get_3dlandmarks( + id_para_batch, exp_para, euler_angle, trans, focal_length, cxy + ) + proj_geo = forward_transform(geometry, euler_angle, trans, focal_length, cxy) + loss_lan = cal_lan_loss(proj_geo[:, :, :2], lms.detach()) + loss = loss_lan + optimizer_frame.zero_grad() + loss.backward() + optimizer_frame.step() + if iter == 1000: + for param_group in optimizer_frame.param_groups: + param_group["lr"] = 0.1 + # if iter % 100 == 0: + # print('pose', iter, loss.item()) + +for param_group in optimizer_frame.param_groups: + param_group["lr"] = 0.1 + +for iter in range(2000): + id_para_batch = id_para.expand(num_frames, -1) + geometry = model_3dmm.get_3dlandmarks( + id_para_batch, exp_para, euler_angle, trans, focal_length, cxy + ) + proj_geo = forward_transform(geometry, euler_angle, trans, focal_length, cxy) + loss_lan = cal_lan_loss(proj_geo[:, :, :2], lms.detach()) + loss_regid = torch.mean(id_para * id_para) + loss_regexp = torch.mean(exp_para * exp_para) + loss = loss_lan + loss_regid * 0.5 + loss_regexp * 0.4 + optimizer_idexp.zero_grad() + optimizer_frame.zero_grad() + loss.backward() + optimizer_idexp.step() + optimizer_frame.step() + # if iter % 100 == 0: + # print('poseidexp', iter, loss_lan.item(), loss_regid.item(), loss_regexp.item()) + if iter % 1000 == 0 and iter >= 1000: + for param_group in optimizer_idexp.param_groups: + param_group["lr"] *= 0.2 + for param_group in optimizer_frame.param_groups: + param_group["lr"] *= 0.2 + +print(loss_lan.item(), torch.mean(trans[:, 2]).item()) + +print(f'[INFO] fitting light...') + +batch_size = 32 + +device_default = torch.device("cuda:0") +device_render = torch.device("cuda:0") +renderer = Render_3DMM(arg_focal, h, w, batch_size, device_render) + +sel_ids = np.arange(0, num_frames, int(num_frames / batch_size))[:batch_size] +imgs = [] +for sel_id in sel_ids: + imgs.append(cv2.imread(img_paths[sel_id])[:, :, ::-1]) +imgs = np.stack(imgs) +sel_imgs = torch.as_tensor(imgs).cuda() +sel_lms = lms[sel_ids] +sel_light = light_para.new_zeros((batch_size, 27), requires_grad=True) +set_requires_grad([sel_light]) + +optimizer_tl = torch.optim.Adam([tex_para, sel_light], lr=0.1) +optimizer_id_frame = torch.optim.Adam([euler_angle, trans, exp_para, id_para], lr=0.01) + +for iter in range(71): + sel_exp_para, sel_euler, sel_trans = ( + exp_para[sel_ids], + euler_angle[sel_ids], + trans[sel_ids], + ) + sel_id_para = id_para.expand(batch_size, -1) + geometry = model_3dmm.get_3dlandmarks( + sel_id_para, sel_exp_para, sel_euler, sel_trans, focal_length, cxy + ) + proj_geo = forward_transform(geometry, sel_euler, sel_trans, focal_length, cxy) + + loss_lan = cal_lan_loss(proj_geo[:, :, :2], sel_lms.detach()) + loss_regid = torch.mean(id_para * id_para) + loss_regexp = torch.mean(sel_exp_para * sel_exp_para) + + sel_tex_para = tex_para.expand(batch_size, -1) + sel_texture = model_3dmm.forward_tex(sel_tex_para) + geometry = model_3dmm.forward_geo(sel_id_para, sel_exp_para) + rott_geo = forward_rott(geometry, sel_euler, sel_trans) + render_imgs = renderer( + rott_geo.to(device_render), + sel_texture.to(device_render), + sel_light.to(device_render), + ) + render_imgs = render_imgs.to(device_default) + + mask = (render_imgs[:, :, :, 3]).detach() > 0.0 + render_proj = sel_imgs.clone() + render_proj[mask] = render_imgs[mask][..., :3].byte() + loss_col = cal_col_loss(render_imgs[:, :, :, :3], sel_imgs.float(), mask) + + if iter > 50: + loss = loss_col + loss_lan * 0.05 + loss_regid * 1.0 + loss_regexp * 0.8 + else: + loss = loss_col + loss_lan * 3 + loss_regid * 2.0 + loss_regexp * 1.0 + + optimizer_tl.zero_grad() + optimizer_id_frame.zero_grad() + loss.backward() + + optimizer_tl.step() + optimizer_id_frame.step() + + if iter % 50 == 0 and iter > 0: + for param_group in optimizer_id_frame.param_groups: + param_group["lr"] *= 0.2 + for param_group in optimizer_tl.param_groups: + param_group["lr"] *= 0.2 + # print(iter, loss_col.item(), loss_lan.item(), loss_regid.item(), loss_regexp.item()) + + +light_mean = torch.mean(sel_light, 0).unsqueeze(0).repeat(num_frames, 1) +light_para.data = light_mean + +exp_para = exp_para.detach() +euler_angle = euler_angle.detach() +trans = trans.detach() +light_para = light_para.detach() + +print(f'[INFO] fine frame-wise fitting...') + +for i in range(int((num_frames - 1) / batch_size + 1)): + + if (i + 1) * batch_size > num_frames: + start_n = num_frames - batch_size + sel_ids = np.arange(num_frames - batch_size, num_frames) + else: + start_n = i * batch_size + sel_ids = np.arange(i * batch_size, i * batch_size + batch_size) + + imgs = [] + for sel_id in sel_ids: + imgs.append(cv2.imread(img_paths[sel_id])[:, :, ::-1]) + imgs = np.stack(imgs) + sel_imgs = torch.as_tensor(imgs).cuda() + sel_lms = lms[sel_ids] + + sel_exp_para = exp_para.new_zeros((batch_size, exp_dim), requires_grad=True) + sel_exp_para.data = exp_para[sel_ids].clone() + sel_euler = euler_angle.new_zeros((batch_size, 3), requires_grad=True) + sel_euler.data = euler_angle[sel_ids].clone() + sel_trans = trans.new_zeros((batch_size, 3), requires_grad=True) + sel_trans.data = trans[sel_ids].clone() + sel_light = light_para.new_zeros((batch_size, 27), requires_grad=True) + sel_light.data = light_para[sel_ids].clone() + + set_requires_grad([sel_exp_para, sel_euler, sel_trans, sel_light]) + + optimizer_cur_batch = torch.optim.Adam( + [sel_exp_para, sel_euler, sel_trans, sel_light], lr=0.005 + ) + + sel_id_para = id_para.expand(batch_size, -1).detach() + sel_tex_para = tex_para.expand(batch_size, -1).detach() + + pre_num = 5 + + if i > 0: + pre_ids = np.arange(start_n - pre_num, start_n) + + for iter in range(50): + + geometry = model_3dmm.get_3dlandmarks( + sel_id_para, sel_exp_para, sel_euler, sel_trans, focal_length, cxy + ) + proj_geo = forward_transform(geometry, sel_euler, sel_trans, focal_length, cxy) + loss_lan = cal_lan_loss(proj_geo[:, :, :2], sel_lms.detach()) + loss_regexp = torch.mean(sel_exp_para * sel_exp_para) + + sel_geometry = model_3dmm.forward_geo(sel_id_para, sel_exp_para) + sel_texture = model_3dmm.forward_tex(sel_tex_para) + geometry = model_3dmm.forward_geo(sel_id_para, sel_exp_para) + rott_geo = forward_rott(geometry, sel_euler, sel_trans) + render_imgs = renderer( + rott_geo.to(device_render), + sel_texture.to(device_render), + sel_light.to(device_render), + ) + render_imgs = render_imgs.to(device_default) + + mask = (render_imgs[:, :, :, 3]).detach() > 0.0 + + loss_col = cal_col_loss(render_imgs[:, :, :, :3], sel_imgs.float(), mask) + + if i > 0: + geometry_lap = model_3dmm.forward_geo_sub( + id_para.expand(batch_size + pre_num, -1).detach(), + torch.cat((exp_para[pre_ids].detach(), sel_exp_para)), + model_3dmm.rigid_ids, + ) + rott_geo_lap = forward_rott( + geometry_lap, + torch.cat((euler_angle[pre_ids].detach(), sel_euler)), + torch.cat((trans[pre_ids].detach(), sel_trans)), + ) + loss_lap = cal_lap_loss( + [rott_geo_lap.reshape(rott_geo_lap.shape[0], -1).permute(1, 0)], [1.0] + ) + else: + geometry_lap = model_3dmm.forward_geo_sub( + id_para.expand(batch_size, -1).detach(), + sel_exp_para, + model_3dmm.rigid_ids, + ) + rott_geo_lap = forward_rott(geometry_lap, sel_euler, sel_trans) + loss_lap = cal_lap_loss( + [rott_geo_lap.reshape(rott_geo_lap.shape[0], -1).permute(1, 0)], [1.0] + ) + + + if iter > 30: + loss = loss_col * 0.5 + loss_lan * 1.5 + loss_lap * 100000 + loss_regexp * 1.0 + else: + loss = loss_col * 0.5 + loss_lan * 8 + loss_lap * 100000 + loss_regexp * 1.0 + + optimizer_cur_batch.zero_grad() + loss.backward() + optimizer_cur_batch.step() + + # if iter % 10 == 0: + # print( + # i, + # iter, + # loss_col.item(), + # loss_lan.item(), + # loss_lap.item(), + # loss_regexp.item(), + # ) + + print(str(i) + " of " + str(int((num_frames - 1) / batch_size + 1)) + " done") + + render_proj = sel_imgs.clone() + render_proj[mask] = render_imgs[mask][..., :3].byte() + + exp_para[sel_ids] = sel_exp_para.clone() + euler_angle[sel_ids] = sel_euler.clone() + trans[sel_ids] = sel_trans.clone() + light_para[sel_ids] = sel_light.clone() + +torch.save( + { + "id": id_para.detach().cpu(), + "exp": exp_para.detach().cpu(), + "euler": euler_angle.detach().cpu(), + "trans": trans.detach().cpu(), + "focal": focal_length.detach().cpu(), + }, + os.path.join(os.path.dirname(args.path), "track_params.pt"), +) + +print("params saved") diff --git a/data_utils/face_tracking/facemodel.py b/data_utils/face_tracking/facemodel.py new file mode 100644 index 0000000..6d19c90 --- /dev/null +++ b/data_utils/face_tracking/facemodel.py @@ -0,0 +1,153 @@ +import torch +import torch.nn as nn +import numpy as np +import os +from util import * + + +class Face_3DMM(nn.Module): + def __init__(self, modelpath, id_dim, exp_dim, tex_dim, point_num): + super(Face_3DMM, self).__init__() + # id_dim = 100 + # exp_dim = 79 + # tex_dim = 100 + self.point_num = point_num + DMM_info = np.load( + os.path.join(modelpath, "3DMM_info.npy"), allow_pickle=True + ).item() + base_id = DMM_info["b_shape"][:id_dim, :] + mu_id = DMM_info["mu_shape"] + base_exp = DMM_info["b_exp"][:exp_dim, :] + mu_exp = DMM_info["mu_exp"] + mu = mu_id + mu_exp + mu = mu.reshape(-1, 3) + for i in range(3): + mu[:, i] -= np.mean(mu[:, i]) + mu = mu.reshape(-1) + self.base_id = torch.as_tensor(base_id).cuda() / 100000.0 + self.base_exp = torch.as_tensor(base_exp).cuda() / 100000.0 + self.mu = torch.as_tensor(mu).cuda() / 100000.0 + base_tex = DMM_info["b_tex"][:tex_dim, :] + mu_tex = DMM_info["mu_tex"] + self.base_tex = torch.as_tensor(base_tex).cuda() + self.mu_tex = torch.as_tensor(mu_tex).cuda() + sig_id = DMM_info["sig_shape"][:id_dim] + sig_tex = DMM_info["sig_tex"][:tex_dim] + sig_exp = DMM_info["sig_exp"][:exp_dim] + self.sig_id = torch.as_tensor(sig_id).cuda() + self.sig_tex = torch.as_tensor(sig_tex).cuda() + self.sig_exp = torch.as_tensor(sig_exp).cuda() + + keys_info = np.load( + os.path.join(modelpath, "keys_info.npy"), allow_pickle=True + ).item() + self.keyinds = torch.as_tensor(keys_info["keyinds"]).cuda() + self.left_contours = torch.as_tensor(keys_info["left_contour"]).cuda() + self.right_contours = torch.as_tensor(keys_info["right_contour"]).cuda() + self.rigid_ids = torch.as_tensor(keys_info["rigid_ids"]).cuda() + + def get_3dlandmarks(self, id_para, exp_para, euler_angle, trans, focal_length, cxy): + id_para = id_para * self.sig_id + exp_para = exp_para * self.sig_exp + batch_size = id_para.shape[0] + num_per_contour = self.left_contours.shape[1] + left_contours_flat = self.left_contours.reshape(-1) + right_contours_flat = self.right_contours.reshape(-1) + sel_index = torch.cat( + ( + 3 * left_contours_flat.unsqueeze(1), + 3 * left_contours_flat.unsqueeze(1) + 1, + 3 * left_contours_flat.unsqueeze(1) + 2, + ), + dim=1, + ).reshape(-1) + left_geometry = ( + torch.mm(id_para, self.base_id[:, sel_index]) + + torch.mm(exp_para, self.base_exp[:, sel_index]) + + self.mu[sel_index] + ) + left_geometry = left_geometry.view(batch_size, -1, 3) + proj_x = forward_transform( + left_geometry, euler_angle, trans, focal_length, cxy + )[:, :, 0] + proj_x = proj_x.reshape(batch_size, 8, num_per_contour) + arg_min = proj_x.argmin(dim=2) + left_geometry = left_geometry.view(batch_size * 8, num_per_contour, 3) + left_3dlands = left_geometry[ + torch.arange(batch_size * 8), arg_min.view(-1), : + ].view(batch_size, 8, 3) + + sel_index = torch.cat( + ( + 3 * right_contours_flat.unsqueeze(1), + 3 * right_contours_flat.unsqueeze(1) + 1, + 3 * right_contours_flat.unsqueeze(1) + 2, + ), + dim=1, + ).reshape(-1) + right_geometry = ( + torch.mm(id_para, self.base_id[:, sel_index]) + + torch.mm(exp_para, self.base_exp[:, sel_index]) + + self.mu[sel_index] + ) + right_geometry = right_geometry.view(batch_size, -1, 3) + proj_x = forward_transform( + right_geometry, euler_angle, trans, focal_length, cxy + )[:, :, 0] + proj_x = proj_x.reshape(batch_size, 8, num_per_contour) + arg_max = proj_x.argmax(dim=2) + right_geometry = right_geometry.view(batch_size * 8, num_per_contour, 3) + right_3dlands = right_geometry[ + torch.arange(batch_size * 8), arg_max.view(-1), : + ].view(batch_size, 8, 3) + + sel_index = torch.cat( + ( + 3 * self.keyinds.unsqueeze(1), + 3 * self.keyinds.unsqueeze(1) + 1, + 3 * self.keyinds.unsqueeze(1) + 2, + ), + dim=1, + ).reshape(-1) + geometry = ( + torch.mm(id_para, self.base_id[:, sel_index]) + + torch.mm(exp_para, self.base_exp[:, sel_index]) + + self.mu[sel_index] + ) + lands_3d = geometry.view(-1, self.keyinds.shape[0], 3) + lands_3d[:, :8, :] = left_3dlands + lands_3d[:, 9:17, :] = right_3dlands + return lands_3d + + def forward_geo_sub(self, id_para, exp_para, sub_index): + id_para = id_para * self.sig_id + exp_para = exp_para * self.sig_exp + sel_index = torch.cat( + ( + 3 * sub_index.unsqueeze(1), + 3 * sub_index.unsqueeze(1) + 1, + 3 * sub_index.unsqueeze(1) + 2, + ), + dim=1, + ).reshape(-1) + geometry = ( + torch.mm(id_para, self.base_id[:, sel_index]) + + torch.mm(exp_para, self.base_exp[:, sel_index]) + + self.mu[sel_index] + ) + return geometry.reshape(-1, sub_index.shape[0], 3) + + def forward_geo(self, id_para, exp_para): + id_para = id_para * self.sig_id + exp_para = exp_para * self.sig_exp + geometry = ( + torch.mm(id_para, self.base_id) + + torch.mm(exp_para, self.base_exp) + + self.mu + ) + return geometry.reshape(-1, self.point_num, 3) + + def forward_tex(self, tex_para): + tex_para = tex_para * self.sig_tex + texture = torch.mm(tex_para, self.base_tex) + self.mu_tex + return texture.reshape(-1, self.point_num, 3) diff --git a/data_utils/face_tracking/geo_transform.py b/data_utils/face_tracking/geo_transform.py new file mode 100644 index 0000000..c5f29b8 --- /dev/null +++ b/data_utils/face_tracking/geo_transform.py @@ -0,0 +1,69 @@ +"""This module contains functions for geometry transform and camera projection""" +import torch +import torch.nn as nn +import numpy as np + + +def euler2rot(euler_angle): + batch_size = euler_angle.shape[0] + theta = euler_angle[:, 0].reshape(-1, 1, 1) + phi = euler_angle[:, 1].reshape(-1, 1, 1) + psi = euler_angle[:, 2].reshape(-1, 1, 1) + one = torch.ones((batch_size, 1, 1), dtype=torch.float32, device=euler_angle.device) + zero = torch.zeros( + (batch_size, 1, 1), dtype=torch.float32, device=euler_angle.device + ) + rot_x = torch.cat( + ( + torch.cat((one, zero, zero), 1), + torch.cat((zero, theta.cos(), theta.sin()), 1), + torch.cat((zero, -theta.sin(), theta.cos()), 1), + ), + 2, + ) + rot_y = torch.cat( + ( + torch.cat((phi.cos(), zero, -phi.sin()), 1), + torch.cat((zero, one, zero), 1), + torch.cat((phi.sin(), zero, phi.cos()), 1), + ), + 2, + ) + rot_z = torch.cat( + ( + torch.cat((psi.cos(), -psi.sin(), zero), 1), + torch.cat((psi.sin(), psi.cos(), zero), 1), + torch.cat((zero, zero, one), 1), + ), + 2, + ) + return torch.bmm(rot_x, torch.bmm(rot_y, rot_z)) + + +def rot_trans_geo(geometry, rot, trans): + rott_geo = torch.bmm(rot, geometry.permute(0, 2, 1)) + trans.view(-1, 3, 1) + return rott_geo.permute(0, 2, 1) + + +def euler_trans_geo(geometry, euler, trans): + rot = euler2rot(euler) + return rot_trans_geo(geometry, rot, trans) + + +def proj_geo(rott_geo, camera_para): + fx = camera_para[:, 0] + fy = camera_para[:, 0] + cx = camera_para[:, 1] + cy = camera_para[:, 2] + + X = rott_geo[:, :, 0] + Y = rott_geo[:, :, 1] + Z = rott_geo[:, :, 2] + + fxX = fx[:, None] * X + fyY = fy[:, None] * Y + + proj_x = -fxX / Z + cx[:, None] + proj_y = fyY / Z + cy[:, None] + + return torch.cat((proj_x[:, :, None], proj_y[:, :, None], Z[:, :, None]), 2) diff --git a/data_utils/face_tracking/render_3dmm.py b/data_utils/face_tracking/render_3dmm.py new file mode 100644 index 0000000..9e8c1cc --- /dev/null +++ b/data_utils/face_tracking/render_3dmm.py @@ -0,0 +1,202 @@ +import torch +import torch.nn as nn +import numpy as np +import os +from pytorch3d.structures import Meshes +from pytorch3d.renderer import ( + look_at_view_transform, + PerspectiveCameras, + FoVPerspectiveCameras, + PointLights, + DirectionalLights, + Materials, + RasterizationSettings, + MeshRenderer, + MeshRasterizer, + SoftPhongShader, + TexturesUV, + TexturesVertex, + blending, +) + +from pytorch3d.ops import interpolate_face_attributes + +from pytorch3d.renderer.blending import ( + BlendParams, + hard_rgb_blend, + sigmoid_alpha_blend, + softmax_rgb_blend, +) + + +class SoftSimpleShader(nn.Module): + """ + Per pixel lighting - the lighting model is applied using the interpolated + coordinates and normals for each pixel. The blending function returns the + soft aggregated color using all the faces per pixel. + + To use the default values, simply initialize the shader with the desired + device e.g. + + """ + + def __init__( + self, device="cpu", cameras=None, lights=None, materials=None, blend_params=None + ): + super().__init__() + self.lights = lights if lights is not None else PointLights(device=device) + self.materials = ( + materials if materials is not None else Materials(device=device) + ) + self.cameras = cameras + self.blend_params = blend_params if blend_params is not None else BlendParams() + + def to(self, device): + # Manually move to device modules which are not subclasses of nn.Module + self.cameras = self.cameras.to(device) + self.materials = self.materials.to(device) + self.lights = self.lights.to(device) + return self + + def forward(self, fragments, meshes, **kwargs) -> torch.Tensor: + + texels = meshes.sample_textures(fragments) + blend_params = kwargs.get("blend_params", self.blend_params) + + cameras = kwargs.get("cameras", self.cameras) + if cameras is None: + msg = "Cameras must be specified either at initialization \ + or in the forward pass of SoftPhongShader" + raise ValueError(msg) + znear = kwargs.get("znear", getattr(cameras, "znear", 1.0)) + zfar = kwargs.get("zfar", getattr(cameras, "zfar", 100.0)) + images = softmax_rgb_blend( + texels, fragments, blend_params, znear=znear, zfar=zfar + ) + return images + + +class Render_3DMM(nn.Module): + def __init__( + self, + focal=1015, + img_h=500, + img_w=500, + batch_size=1, + device=torch.device("cuda:0"), + ): + super(Render_3DMM, self).__init__() + + self.focal = focal + self.img_h = img_h + self.img_w = img_w + self.device = device + self.renderer = self.get_render(batch_size) + + dir_path = os.path.dirname(os.path.realpath(__file__)) + topo_info = np.load( + os.path.join(dir_path, "3DMM", "topology_info.npy"), allow_pickle=True + ).item() + self.tris = torch.as_tensor(topo_info["tris"]).to(self.device) + self.vert_tris = torch.as_tensor(topo_info["vert_tris"]).to(self.device) + + def compute_normal(self, geometry): + vert_1 = torch.index_select(geometry, 1, self.tris[:, 0]) + vert_2 = torch.index_select(geometry, 1, self.tris[:, 1]) + vert_3 = torch.index_select(geometry, 1, self.tris[:, 2]) + nnorm = torch.cross(vert_2 - vert_1, vert_3 - vert_1, 2) + tri_normal = nn.functional.normalize(nnorm, dim=2) + v_norm = tri_normal[:, self.vert_tris, :].sum(2) + vert_normal = v_norm / v_norm.norm(dim=2).unsqueeze(2) + return vert_normal + + def get_render(self, batch_size=1): + half_s = self.img_w * 0.5 + R, T = look_at_view_transform(10, 0, 0) + R = R.repeat(batch_size, 1, 1) + T = torch.zeros((batch_size, 3), dtype=torch.float32).to(self.device) + + cameras = FoVPerspectiveCameras( + device=self.device, + R=R, + T=T, + znear=0.01, + zfar=20, + fov=2 * np.arctan(self.img_w // 2 / self.focal) * 180.0 / np.pi, + ) + lights = PointLights( + device=self.device, + location=[[0.0, 0.0, 1e5]], + ambient_color=[[1, 1, 1]], + specular_color=[[0.0, 0.0, 0.0]], + diffuse_color=[[0.0, 0.0, 0.0]], + ) + sigma = 1e-4 + raster_settings = RasterizationSettings( + image_size=(self.img_h, self.img_w), + blur_radius=np.log(1.0 / 1e-4 - 1.0) * sigma / 18.0, + faces_per_pixel=2, + perspective_correct=False, + ) + blend_params = blending.BlendParams(background_color=[0, 0, 0]) + renderer = MeshRenderer( + rasterizer=MeshRasterizer(raster_settings=raster_settings, cameras=cameras), + shader=SoftSimpleShader( + lights=lights, blend_params=blend_params, cameras=cameras + ), + ) + return renderer.to(self.device) + + @staticmethod + def Illumination_layer(face_texture, norm, gamma): + + n_b, num_vertex, _ = face_texture.size() + n_v_full = n_b * num_vertex + gamma = gamma.view(-1, 3, 9).clone() + gamma[:, :, 0] += 0.8 + + gamma = gamma.permute(0, 2, 1) + + a0 = np.pi + a1 = 2 * np.pi / np.sqrt(3.0) + a2 = 2 * np.pi / np.sqrt(8.0) + c0 = 1 / np.sqrt(4 * np.pi) + c1 = np.sqrt(3.0) / np.sqrt(4 * np.pi) + c2 = 3 * np.sqrt(5.0) / np.sqrt(12 * np.pi) + d0 = 0.5 / np.sqrt(3.0) + + Y0 = torch.ones(n_v_full).to(gamma.device).float() * a0 * c0 + norm = norm.view(-1, 3) + nx, ny, nz = norm[:, 0], norm[:, 1], norm[:, 2] + arrH = [] + + arrH.append(Y0) + arrH.append(-a1 * c1 * ny) + arrH.append(a1 * c1 * nz) + arrH.append(-a1 * c1 * nx) + arrH.append(a2 * c2 * nx * ny) + arrH.append(-a2 * c2 * ny * nz) + arrH.append(a2 * c2 * d0 * (3 * nz.pow(2) - 1)) + arrH.append(-a2 * c2 * nx * nz) + arrH.append(a2 * c2 * 0.5 * (nx.pow(2) - ny.pow(2))) + + H = torch.stack(arrH, 1) + Y = H.view(n_b, num_vertex, 9) + lighting = Y.bmm(gamma) + + face_color = face_texture * lighting + return face_color + + def forward(self, rott_geometry, texture, diffuse_sh): + face_normal = self.compute_normal(rott_geometry) + face_color = self.Illumination_layer(texture, face_normal, diffuse_sh) + face_color = TexturesVertex(face_color) + mesh = Meshes( + rott_geometry, + self.tris.float().repeat(rott_geometry.shape[0], 1, 1), + face_color, + ) + rendered_img = self.renderer(mesh) + rendered_img = torch.clamp(rendered_img, 0, 255) + + return rendered_img diff --git a/data_utils/face_tracking/render_land.py b/data_utils/face_tracking/render_land.py new file mode 100644 index 0000000..b4bd7fe --- /dev/null +++ b/data_utils/face_tracking/render_land.py @@ -0,0 +1,192 @@ +import torch +import torch.nn as nn +import render_util +import geo_transform +import numpy as np + + +def compute_tri_normal(geometry, tris): + geometry = geometry.permute(0, 2, 1) + tri_1 = tris[:, 0] + tri_2 = tris[:, 1] + tri_3 = tris[:, 2] + + vert_1 = torch.index_select(geometry, 2, tri_1) + vert_2 = torch.index_select(geometry, 2, tri_2) + vert_3 = torch.index_select(geometry, 2, tri_3) + + nnorm = torch.cross(vert_2 - vert_1, vert_3 - vert_1, 1) + normal = nn.functional.normalize(nnorm).permute(0, 2, 1) + return normal + + +class Compute_normal_base(torch.autograd.Function): + @staticmethod + def forward(ctx, normal): + (normal_b,) = render_util.normal_base_forward(normal) + ctx.save_for_backward(normal) + return normal_b + + @staticmethod + def backward(ctx, grad_normal_b): + (normal,) = ctx.saved_tensors + (grad_normal,) = render_util.normal_base_backward(grad_normal_b, normal) + return grad_normal + + +class Normal_Base(torch.nn.Module): + def __init__(self): + super(Normal_Base, self).__init__() + + def forward(self, normal): + return Compute_normal_base.apply(normal) + + +def preprocess_render(geometry, euler, trans, cam, tris, vert_tris, ori_img): + point_num = geometry.shape[1] + rott_geo = geo_transform.euler_trans_geo(geometry, euler, trans) + proj_geo = geo_transform.proj_geo(rott_geo, cam) + rot_tri_normal = compute_tri_normal(rott_geo, tris) + rot_vert_normal = torch.index_select(rot_tri_normal, 1, vert_tris) + is_visible = -torch.bmm( + rot_vert_normal.reshape(-1, 1, 3), + nn.functional.normalize(rott_geo.reshape(-1, 3, 1)), + ).reshape(-1, point_num) + is_visible[is_visible < 0.01] = -1 + pixel_valid = torch.zeros( + (ori_img.shape[0], ori_img.shape[1] * ori_img.shape[2]), + dtype=torch.float32, + device=ori_img.device, + ) + return rott_geo, proj_geo, rot_tri_normal, is_visible, pixel_valid + + +class Render_Face(torch.autograd.Function): + @staticmethod + def forward( + ctx, proj_geo, texture, nbl, ori_img, is_visible, tri_inds, pixel_valid + ): + batch_size, h, w, _ = ori_img.shape + ori_img = ori_img.view(batch_size, -1, 3) + ori_size = torch.cat( + ( + torch.ones((batch_size, 1), dtype=torch.int32, device=ori_img.device) + * h, + torch.ones((batch_size, 1), dtype=torch.int32, device=ori_img.device) + * w, + ), + dim=1, + ).view(-1) + tri_index, tri_coord, render, real = render_util.render_face_forward( + proj_geo, ori_img, ori_size, texture, nbl, is_visible, tri_inds, pixel_valid + ) + ctx.save_for_backward( + ori_img, ori_size, proj_geo, texture, nbl, tri_inds, tri_index, tri_coord + ) + return render, real + + @staticmethod + def backward(ctx, grad_render, grad_real): + ( + ori_img, + ori_size, + proj_geo, + texture, + nbl, + tri_inds, + tri_index, + tri_coord, + ) = ctx.saved_tensors + grad_proj_geo, grad_texture, grad_nbl = render_util.render_face_backward( + grad_render, + grad_real, + ori_img, + ori_size, + proj_geo, + texture, + nbl, + tri_inds, + tri_index, + tri_coord, + ) + return grad_proj_geo, grad_texture, grad_nbl, None, None, None, None + + +class Render_RGB(nn.Module): + def __init__(self): + super(Render_RGB, self).__init__() + + def forward( + self, proj_geo, texture, nbl, ori_img, is_visible, tri_inds, pixel_valid + ): + return Render_Face.apply( + proj_geo, texture, nbl, ori_img, is_visible, tri_inds, pixel_valid + ) + + +def cal_land(proj_geo, is_visible, lands_info, land_num): + (land_index,) = render_util.update_contour(lands_info, is_visible, land_num) + proj_land = torch.index_select(proj_geo.reshape(-1, 3), 0, land_index)[ + :, :2 + ].reshape(-1, land_num, 2) + return proj_land + + +class Render_Land(nn.Module): + def __init__(self): + super(Render_Land, self).__init__() + lands_info = np.loadtxt("../data/3DMM/lands_info.txt", dtype=np.int32) + self.lands_info = torch.as_tensor(lands_info).cuda() + tris = np.loadtxt("../data/3DMM/tris.txt", dtype=np.int64) + self.tris = torch.as_tensor(tris).cuda() - 1 + vert_tris = np.loadtxt("../data/3DMM/vert_tris.txt", dtype=np.int64) + self.vert_tris = torch.as_tensor(vert_tris).cuda() + self.normal_baser = Normal_Base().cuda() + self.renderer = Render_RGB().cuda() + + def render_mesh(self, geometry, euler, trans, cam, ori_img, light): + batch_size, h, w, _ = ori_img.shape + ori_img = ori_img.view(batch_size, -1, 3) + ori_size = torch.cat( + ( + torch.ones((batch_size, 1), dtype=torch.int32, device=ori_img.device) + * h, + torch.ones((batch_size, 1), dtype=torch.int32, device=ori_img.device) + * w, + ), + dim=1, + ).view(-1) + rott_geo, proj_geo, rot_tri_normal, _, _ = preprocess_render( + geometry, euler, trans, cam, self.tris, self.vert_tris, ori_img + ) + tri_nb = self.normal_baser(rot_tri_normal.contiguous()) + nbl = torch.bmm( + tri_nb, (light.reshape(-1, 9, 3))[:, :, 0].unsqueeze(-1).repeat(1, 1, 3) + ) + texture = torch.ones_like(geometry) * 200 + (render,) = render_util.render_mesh( + proj_geo, ori_img, ori_size, texture, nbl, self.tris + ) + return render.view(batch_size, h, w, 3).byte() + + def cal_loss_rgb(self, geometry, euler, trans, cam, ori_img, light, texture, lands): + rott_geo, proj_geo, rot_tri_normal, is_visible, pixel_valid = preprocess_render( + geometry, euler, trans, cam, self.tris, self.vert_tris, ori_img + ) + tri_nb = self.normal_baser(rot_tri_normal.contiguous()) + nbl = torch.bmm(tri_nb, light.reshape(-1, 9, 3)) + render, real = self.renderer( + proj_geo, texture, nbl, ori_img, is_visible, self.tris, pixel_valid + ) + proj_land = cal_land(proj_geo, is_visible, self.lands_info, lands.shape[1]) + col_minus = torch.norm((render - real).reshape(-1, 3), dim=1).reshape( + ori_img.shape[0], -1 + ) + col_dis = torch.mean(col_minus * pixel_valid) / ( + torch.mean(pixel_valid) + 0.00001 + ) + land_dists = torch.norm((proj_land - lands).reshape(-1, 2), dim=1).reshape( + ori_img.shape[0], -1 + ) + lan_dis = torch.mean(land_dists) + return col_dis, lan_dis diff --git a/data_utils/face_tracking/util.py b/data_utils/face_tracking/util.py new file mode 100644 index 0000000..cc0f3d8 --- /dev/null +++ b/data_utils/face_tracking/util.py @@ -0,0 +1,109 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def compute_tri_normal(geometry, tris): + tri_1 = tris[:, 0] + tri_2 = tris[:, 1] + tri_3 = tris[:, 2] + vert_1 = torch.index_select(geometry, 1, tri_1) + vert_2 = torch.index_select(geometry, 1, tri_2) + vert_3 = torch.index_select(geometry, 1, tri_3) + nnorm = torch.cross(vert_2 - vert_1, vert_3 - vert_1, 2) + normal = nn.functional.normalize(nnorm) + return normal + + +def euler2rot(euler_angle): + batch_size = euler_angle.shape[0] + theta = euler_angle[:, 0].reshape(-1, 1, 1) + phi = euler_angle[:, 1].reshape(-1, 1, 1) + psi = euler_angle[:, 2].reshape(-1, 1, 1) + one = torch.ones(batch_size, 1, 1).to(euler_angle.device) + zero = torch.zeros(batch_size, 1, 1).to(euler_angle.device) + rot_x = torch.cat( + ( + torch.cat((one, zero, zero), 1), + torch.cat((zero, theta.cos(), theta.sin()), 1), + torch.cat((zero, -theta.sin(), theta.cos()), 1), + ), + 2, + ) + rot_y = torch.cat( + ( + torch.cat((phi.cos(), zero, -phi.sin()), 1), + torch.cat((zero, one, zero), 1), + torch.cat((phi.sin(), zero, phi.cos()), 1), + ), + 2, + ) + rot_z = torch.cat( + ( + torch.cat((psi.cos(), -psi.sin(), zero), 1), + torch.cat((psi.sin(), psi.cos(), zero), 1), + torch.cat((zero, zero, one), 1), + ), + 2, + ) + return torch.bmm(rot_x, torch.bmm(rot_y, rot_z)) + + +def rot_trans_pts(geometry, rot, trans): + rott_geo = torch.bmm(rot, geometry.permute(0, 2, 1)) + trans[:, :, None] + return rott_geo.permute(0, 2, 1) + + +def cal_lap_loss(tensor_list, weight_list): + lap_kernel = ( + torch.Tensor((-0.5, 1.0, -0.5)) + .unsqueeze(0) + .unsqueeze(0) + .float() + .to(tensor_list[0].device) + ) + loss_lap = 0 + for i in range(len(tensor_list)): + in_tensor = tensor_list[i] + in_tensor = in_tensor.view(-1, 1, in_tensor.shape[-1]) + out_tensor = F.conv1d(in_tensor, lap_kernel) + loss_lap += torch.mean(out_tensor ** 2) * weight_list[i] + return loss_lap + + +def proj_pts(rott_geo, focal_length, cxy): + cx, cy = cxy[0], cxy[1] + X = rott_geo[:, :, 0] + Y = rott_geo[:, :, 1] + Z = rott_geo[:, :, 2] + fxX = focal_length * X + fyY = focal_length * Y + proj_x = -fxX / Z + cx + proj_y = fyY / Z + cy + return torch.cat((proj_x[:, :, None], proj_y[:, :, None], Z[:, :, None]), 2) + + +def forward_rott(geometry, euler_angle, trans): + rot = euler2rot(euler_angle) + rott_geo = rot_trans_pts(geometry, rot, trans) + return rott_geo + + +def forward_transform(geometry, euler_angle, trans, focal_length, cxy): + rot = euler2rot(euler_angle) + rott_geo = rot_trans_pts(geometry, rot, trans) + proj_geo = proj_pts(rott_geo, focal_length, cxy) + return proj_geo + + +def cal_lan_loss(proj_lan, gt_lan): + return torch.mean((proj_lan - gt_lan) ** 2) + + +def cal_col_loss(pred_img, gt_img, img_mask): + pred_img = pred_img.float() + # loss = torch.sqrt(torch.sum(torch.square(pred_img - gt_img), 3))*img_mask/255 + loss = (torch.sum(torch.square(pred_img - gt_img), 3)) * img_mask / 255 + loss = torch.sum(loss, dim=(1, 2)) / torch.sum(img_mask, dim=(1, 2)) + loss = torch.mean(loss) + return loss diff --git a/data_utils/process.py b/data_utils/process.py new file mode 100644 index 0000000..65aaa2d --- /dev/null +++ b/data_utils/process.py @@ -0,0 +1,444 @@ +import os +import glob +import tqdm +import json +import argparse +import cv2 +import numpy as np + +def extract_audio(path, out_path, sample_rate=16000): + + print(f'[INFO] ===== extract audio from {path} to {out_path} =====') + cmd = f'ffmpeg -i {path} -f wav -ar {sample_rate} {out_path}' + os.system(cmd) + print(f'[INFO] ===== extracted audio =====') + + +def extract_audio_features(path, mode='wav2vec'): + + print(f'[INFO] ===== extract audio labels for {path} =====') + if mode == 'wav2vec': + cmd = f'python nerf/asr.py --wav {path} --save_feats' + else: # deepspeech + cmd = f'python data_utils/deepspeech_features/extract_ds_features.py --input {path}' + os.system(cmd) + print(f'[INFO] ===== extracted audio labels =====') + + + +def extract_images(path, out_path, fps=25): + + print(f'[INFO] ===== extract images from {path} to {out_path} =====') + cmd = f'ffmpeg -i {path} -vf fps={fps} -qmin 1 -q:v 1 -start_number 0 {os.path.join(out_path, "%d.jpg")}' + os.system(cmd) + print(f'[INFO] ===== extracted images =====') + + +def extract_semantics(ori_imgs_dir, parsing_dir): + + print(f'[INFO] ===== extract semantics from {ori_imgs_dir} to {parsing_dir} =====') + cmd = f'python data_utils/face_parsing/test.py --respath={parsing_dir} --imgpath={ori_imgs_dir}' + os.system(cmd) + print(f'[INFO] ===== extracted semantics =====') + + +def extract_landmarks(ori_imgs_dir): + + print(f'[INFO] ===== extract face landmarks from {ori_imgs_dir} =====') + + import face_alignment + fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False) + image_paths = glob.glob(os.path.join(ori_imgs_dir, '*.jpg')) + for image_path in tqdm.tqdm(image_paths): + input = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) # [H, W, 3] + input = cv2.cvtColor(input, cv2.COLOR_BGR2RGB) + preds = fa.get_landmarks(input) + if len(preds) > 0: + lands = preds[0].reshape(-1, 2)[:,:2] + np.savetxt(image_path.replace('jpg', 'lms'), lands, '%f') + del fa + print(f'[INFO] ===== extracted face landmarks =====') + + +def extract_background(base_dir, ori_imgs_dir): + + print(f'[INFO] ===== extract background image from {ori_imgs_dir} =====') + + from sklearn.neighbors import NearestNeighbors + + image_paths = glob.glob(os.path.join(ori_imgs_dir, '*.jpg')) + # only use 1/20 image_paths + image_paths = image_paths[::20] + # read one image to get H/W + tmp_image = cv2.imread(image_paths[0], cv2.IMREAD_UNCHANGED) # [H, W, 3] + h, w = tmp_image.shape[:2] + + # nearest neighbors + all_xys = np.mgrid[0:h, 0:w].reshape(2, -1).transpose() + distss = [] + for image_path in tqdm.tqdm(image_paths): + parse_img = cv2.imread(image_path.replace('ori_imgs', 'parsing').replace('.jpg', '.png')) + bg = (parse_img[..., 0] == 255) & (parse_img[..., 1] == 255) & (parse_img[..., 2] == 255) + fg_xys = np.stack(np.nonzero(~bg)).transpose(1, 0) + nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(fg_xys) + dists, _ = nbrs.kneighbors(all_xys) + distss.append(dists) + + distss = np.stack(distss) + max_dist = np.max(distss, 0) + max_id = np.argmax(distss, 0) + + bc_pixs = max_dist > 5 + bc_pixs_id = np.nonzero(bc_pixs) + bc_ids = max_id[bc_pixs] + + imgs = [] + num_pixs = distss.shape[1] + for image_path in image_paths: + img = cv2.imread(image_path) + imgs.append(img) + imgs = np.stack(imgs).reshape(-1, num_pixs, 3) + + bc_img = np.zeros((h*w, 3), dtype=np.uint8) + bc_img[bc_pixs_id, :] = imgs[bc_ids, bc_pixs_id, :] + bc_img = bc_img.reshape(h, w, 3) + + max_dist = max_dist.reshape(h, w) + bc_pixs = max_dist > 5 + bg_xys = np.stack(np.nonzero(~bc_pixs)).transpose() + fg_xys = np.stack(np.nonzero(bc_pixs)).transpose() + nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(fg_xys) + distances, indices = nbrs.kneighbors(bg_xys) + bg_fg_xys = fg_xys[indices[:, 0]] + bc_img[bg_xys[:, 0], bg_xys[:, 1], :] = bc_img[bg_fg_xys[:, 0], bg_fg_xys[:, 1], :] + + cv2.imwrite(os.path.join(base_dir, 'bc.jpg'), bc_img) + + print(f'[INFO] ===== extracted background image =====') + + +def extract_torso_and_gt(base_dir, ori_imgs_dir): + + print(f'[INFO] ===== extract torso and gt images for {base_dir} =====') + + from scipy.ndimage import binary_erosion, binary_dilation + + # load bg + bg_image = cv2.imread(os.path.join(base_dir, 'bc.jpg'), cv2.IMREAD_UNCHANGED) + + image_paths = glob.glob(os.path.join(ori_imgs_dir, '*.jpg')) + + for image_path in tqdm.tqdm(image_paths): + # read ori image + ori_image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) # [H, W, 3] + + # read semantics + seg = cv2.imread(image_path.replace('ori_imgs', 'parsing').replace('.jpg', '.png')) + head_part = (seg[..., 0] == 255) & (seg[..., 1] == 0) & (seg[..., 2] == 0) + neck_part = (seg[..., 0] == 0) & (seg[..., 1] == 255) & (seg[..., 2] == 0) + torso_part = (seg[..., 0] == 0) & (seg[..., 1] == 0) & (seg[..., 2] == 255) + bg_part = (seg[..., 0] == 255) & (seg[..., 1] == 255) & (seg[..., 2] == 255) + + # get gt image + gt_image = ori_image.copy() + gt_image[bg_part] = bg_image[bg_part] + cv2.imwrite(image_path.replace('ori_imgs', 'gt_imgs'), gt_image) + + # get torso image + torso_image = gt_image.copy() # rgb + torso_image[head_part] = bg_image[head_part] + torso_alpha = 255 * np.ones((gt_image.shape[0], gt_image.shape[1], 1), dtype=np.uint8) # alpha + + # torso part "vertical" in-painting... + L = 8 + 1 + torso_coords = np.stack(np.nonzero(torso_part), axis=-1) # [M, 2] + # lexsort: sort 2D coords first by y then by x, + # ref: https://stackoverflow.com/questions/2706605/sorting-a-2d-numpy-array-by-multiple-axes + inds = np.lexsort((torso_coords[:, 0], torso_coords[:, 1])) + torso_coords = torso_coords[inds] + # choose the top pixel for each column + u, uid, ucnt = np.unique(torso_coords[:, 1], return_index=True, return_counts=True) + top_torso_coords = torso_coords[uid] # [m, 2] + # only keep top-is-head pixels + top_torso_coords_up = top_torso_coords.copy() - np.array([1, 0]) + mask = head_part[tuple(top_torso_coords_up.T)] + if mask.any(): + top_torso_coords = top_torso_coords[mask] + # get the color + top_torso_colors = gt_image[tuple(top_torso_coords.T)] # [m, 3] + # construct inpaint coords (vertically up, or minus in x) + inpaint_torso_coords = top_torso_coords[None].repeat(L, 0) # [L, m, 2] + inpaint_offsets = np.stack([-np.arange(L), np.zeros(L, dtype=np.int32)], axis=-1)[:, None] # [L, 1, 2] + inpaint_torso_coords += inpaint_offsets + inpaint_torso_coords = inpaint_torso_coords.reshape(-1, 2) # [Lm, 2] + inpaint_torso_colors = top_torso_colors[None].repeat(L, 0) # [L, m, 3] + darken_scaler = 0.98 ** np.arange(L).reshape(L, 1, 1) # [L, 1, 1] + inpaint_torso_colors = (inpaint_torso_colors * darken_scaler).reshape(-1, 3) # [Lm, 3] + # set color + torso_image[tuple(inpaint_torso_coords.T)] = inpaint_torso_colors + + inpaint_torso_mask = np.zeros_like(torso_image[..., 0]).astype(bool) + inpaint_torso_mask[tuple(inpaint_torso_coords.T)] = True + else: + inpaint_torso_mask = None + + + # neck part "vertical" in-painting... + push_down = 4 + L = 48 + push_down + 1 + + neck_part = binary_dilation(neck_part, structure=np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=bool), iterations=3) + + neck_coords = np.stack(np.nonzero(neck_part), axis=-1) # [M, 2] + # lexsort: sort 2D coords first by y then by x, + # ref: https://stackoverflow.com/questions/2706605/sorting-a-2d-numpy-array-by-multiple-axes + inds = np.lexsort((neck_coords[:, 0], neck_coords[:, 1])) + neck_coords = neck_coords[inds] + # choose the top pixel for each column + u, uid, ucnt = np.unique(neck_coords[:, 1], return_index=True, return_counts=True) + top_neck_coords = neck_coords[uid] # [m, 2] + # only keep top-is-head pixels + top_neck_coords_up = top_neck_coords.copy() - np.array([1, 0]) + mask = head_part[tuple(top_neck_coords_up.T)] + + top_neck_coords = top_neck_coords[mask] + # push these top down for 4 pixels to make the neck inpainting more natural... + offset_down = np.minimum(ucnt[mask] - 1, push_down) + top_neck_coords += np.stack([offset_down, np.zeros_like(offset_down)], axis=-1) + # get the color + top_neck_colors = gt_image[tuple(top_neck_coords.T)] # [m, 3] + # construct inpaint coords (vertically up, or minus in x) + inpaint_neck_coords = top_neck_coords[None].repeat(L, 0) # [L, m, 2] + inpaint_offsets = np.stack([-np.arange(L), np.zeros(L, dtype=np.int32)], axis=-1)[:, None] # [L, 1, 2] + inpaint_neck_coords += inpaint_offsets + inpaint_neck_coords = inpaint_neck_coords.reshape(-1, 2) # [Lm, 2] + inpaint_neck_colors = top_neck_colors[None].repeat(L, 0) # [L, m, 3] + darken_scaler = 0.98 ** np.arange(L).reshape(L, 1, 1) # [L, 1, 1] + inpaint_neck_colors = (inpaint_neck_colors * darken_scaler).reshape(-1, 3) # [Lm, 3] + # set color + torso_image[tuple(inpaint_neck_coords.T)] = inpaint_neck_colors + + # apply blurring to the inpaint area to avoid vertical-line artifects... + inpaint_mask = np.zeros_like(torso_image[..., 0]).astype(bool) + inpaint_mask[tuple(inpaint_neck_coords.T)] = True + + blur_img = torso_image.copy() + blur_img = cv2.GaussianBlur(blur_img, (5, 5), cv2.BORDER_DEFAULT) + + torso_image[inpaint_mask] = blur_img[inpaint_mask] + + # set mask + mask = (neck_part | torso_part | inpaint_mask) + if inpaint_torso_mask is not None: + mask = mask | inpaint_torso_mask + torso_image[~mask] = 0 + torso_alpha[~mask] = 0 + + cv2.imwrite(image_path.replace('ori_imgs', 'torso_imgs').replace('.jpg', '.png'), np.concatenate([torso_image, torso_alpha], axis=-1)) + + print(f'[INFO] ===== extracted torso and gt images =====') + + +def face_tracking(ori_imgs_dir): + + print(f'[INFO] ===== perform face tracking =====') + + image_paths = glob.glob(os.path.join(ori_imgs_dir, '*.jpg')) + + # read one image to get H/W + tmp_image = cv2.imread(image_paths[0], cv2.IMREAD_UNCHANGED) # [H, W, 3] + h, w = tmp_image.shape[:2] + + cmd = f'python data_utils/face_tracking/face_tracker.py --path={ori_imgs_dir} --img_h={h} --img_w={w} --frame_num={len(image_paths)}' + + os.system(cmd) + + print(f'[INFO] ===== finished face tracking =====') + + +def save_transforms(base_dir, ori_imgs_dir): + print(f'[INFO] ===== save transforms =====') + + import torch + + image_paths = glob.glob(os.path.join(ori_imgs_dir, '*.jpg')) + + # read one image to get H/W + tmp_image = cv2.imread(image_paths[0], cv2.IMREAD_UNCHANGED) # [H, W, 3] + h, w = tmp_image.shape[:2] + + params_dict = torch.load(os.path.join(base_dir, 'track_params.pt')) + focal_len = params_dict['focal'] + euler_angle = params_dict['euler'] + trans = params_dict['trans'] / 10.0 + valid_num = euler_angle.shape[0] + + def euler2rot(euler_angle): + batch_size = euler_angle.shape[0] + theta = euler_angle[:, 0].reshape(-1, 1, 1) + phi = euler_angle[:, 1].reshape(-1, 1, 1) + psi = euler_angle[:, 2].reshape(-1, 1, 1) + one = torch.ones((batch_size, 1, 1), dtype=torch.float32, device=euler_angle.device) + zero = torch.zeros((batch_size, 1, 1), dtype=torch.float32, device=euler_angle.device) + rot_x = torch.cat(( + torch.cat((one, zero, zero), 1), + torch.cat((zero, theta.cos(), theta.sin()), 1), + torch.cat((zero, -theta.sin(), theta.cos()), 1), + ), 2) + rot_y = torch.cat(( + torch.cat((phi.cos(), zero, -phi.sin()), 1), + torch.cat((zero, one, zero), 1), + torch.cat((phi.sin(), zero, phi.cos()), 1), + ), 2) + rot_z = torch.cat(( + torch.cat((psi.cos(), -psi.sin(), zero), 1), + torch.cat((psi.sin(), psi.cos(), zero), 1), + torch.cat((zero, zero, one), 1) + ), 2) + return torch.bmm(rot_x, torch.bmm(rot_y, rot_z)) + + + # train_val_split = int(valid_num*0.5) + # train_val_split = valid_num - 25 * 20 # take the last 20s as valid set. + train_val_split = int(valid_num * 10 / 11) + + train_ids = torch.arange(0, train_val_split) + val_ids = torch.arange(train_val_split, valid_num) + + rot = euler2rot(euler_angle) + rot_inv = rot.permute(0, 2, 1) + trans_inv = -torch.bmm(rot_inv, trans.unsqueeze(2)) + + pose = torch.eye(4, dtype=torch.float32) + save_ids = ['train', 'val'] + train_val_ids = [train_ids, val_ids] + mean_z = -float(torch.mean(trans[:, 2]).item()) + + for split in range(2): + transform_dict = dict() + transform_dict['focal_len'] = float(focal_len[0]) + transform_dict['cx'] = float(w/2.0) + transform_dict['cy'] = float(h/2.0) + transform_dict['frames'] = [] + ids = train_val_ids[split] + save_id = save_ids[split] + + for i in ids: + i = i.item() + frame_dict = dict() + frame_dict['img_id'] = i + frame_dict['aud_id'] = i + + pose[:3, :3] = rot_inv[i] + pose[:3, 3] = trans_inv[i, :, 0] + + frame_dict['transform_matrix'] = pose.numpy().tolist() + + transform_dict['frames'].append(frame_dict) + + with open(os.path.join(base_dir, 'transforms_' + save_id + '.json'), 'w') as fp: + json.dump(transform_dict, fp, indent=2, separators=(',', ': ')) + + print(f'[INFO] ===== finished saving transforms =====') + + + +def extract_torso_train(base_dir, ori_imgs_dir): + + print(f'[INFO] ===== extract training torso gt images for {base_dir} =====') + + # load bg + bg_image = cv2.imread(os.path.join(base_dir, 'bc.jpg'), cv2.IMREAD_UNCHANGED) + + image_paths = glob.glob(os.path.join(ori_imgs_dir, '*.jpg')) + + for image_path in tqdm.tqdm(image_paths): + # read ori image + ori_image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) # [H, W, 3] + + # read semantics + seg = cv2.imread(image_path.replace('ori_imgs', 'parsing').replace('.jpg', '.png')) + head_part = (seg[..., 0] == 255) & (seg[..., 1] == 0) & (seg[..., 2] == 0) + neck_part = (seg[..., 0] == 0) & (seg[..., 1] == 255) & (seg[..., 2] == 0) + torso_part = (seg[..., 0] == 0) & (seg[..., 1] == 0) & (seg[..., 2] == 255) + bg_part = (seg[..., 0] == 255) & (seg[..., 1] == 255) & (seg[..., 2] == 255) + + # get gt image + gt_image = ori_image.copy() + gt_image[bg_part] = bg_image[bg_part] + cv2.imwrite(image_path.replace('ori_imgs', 'gt_imgs'), gt_image) + + # get torso image + torso_image = gt_image.copy() # rgb + torso_image[head_part] = bg_image[head_part] + torso_alpha = 255 * np.ones((gt_image.shape[0], gt_image.shape[1], 1), dtype=np.uint8) # alpha + torso_alpha[head_part] = 0 + torso_alpha[bg_part] = 0 + cv2.imwrite(image_path.replace('ori_imgs', 'torso_imgs_train').replace('.jpg', '.png'), np.concatenate([torso_image, torso_alpha], axis=-1)) + + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('path', type=str, help="path to video file") + parser.add_argument('--task', type=int, default=-1, help="-1 means all") + parser.add_argument('--asr', type=str, default='wav2vec', help="wav2vec or deepspeech") + + opt = parser.parse_args() + + base_dir = os.path.dirname(opt.path) + + wav_path = os.path.join(base_dir, 'aud.wav') + ori_imgs_dir = os.path.join(base_dir, 'ori_imgs') + parsing_dir = os.path.join(base_dir, 'parsing') + gt_imgs_dir = os.path.join(base_dir, 'gt_imgs') + torso_imgs_dir = os.path.join(base_dir, 'torso_imgs') + torso_imgs_train_dir = os.path.join(base_dir, 'torso_imgs_train') + + os.makedirs(ori_imgs_dir, exist_ok=True) + os.makedirs(parsing_dir, exist_ok=True) + os.makedirs(gt_imgs_dir, exist_ok=True) + os.makedirs(torso_imgs_dir, exist_ok=True) + os.makedirs(torso_imgs_train_dir, exist_ok=True) + + + # extract audio + if opt.task == -1 or opt.task == 1: + extract_audio(opt.path, wav_path) + + # extract audio features + if opt.task == -1 or opt.task == 2: + extract_audio_features(wav_path, mode=opt.asr) + + # extract images + if opt.task == -1 or opt.task == 3: + extract_images(opt.path, ori_imgs_dir) + + # face parsing + if opt.task == -1 or opt.task == 4: + extract_semantics(ori_imgs_dir, parsing_dir) + + # extract bg + if opt.task == -1 or opt.task == 5: + extract_background(base_dir, ori_imgs_dir) + + # extract torso images and gt_images + if opt.task == -1 or opt.task == 6: + extract_torso_and_gt(base_dir, ori_imgs_dir) + + # extract face landmarks + if opt.task == -1 or opt.task == 7: + extract_landmarks(ori_imgs_dir) + + # face tracking + if opt.task == -1 or opt.task == 8: + face_tracking(ori_imgs_dir) + + # save transforms.json + if opt.task == -1 or opt.task == 9: + save_transforms(base_dir, ori_imgs_dir) + + if opt.task == -1 or opt.task == 10: + extract_torso_train(base_dir, ori_imgs_dir) + diff --git a/encoding.py b/encoding.py new file mode 100644 index 0000000..c700b47 --- /dev/null +++ b/encoding.py @@ -0,0 +1,38 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def get_encoder(encoding, input_dim=3, + multires=6, + degree=4, + num_levels=16, level_dim=2, base_resolution=16, log2_hashmap_size=19, desired_resolution=2048, align_corners=False, + **kwargs): + + if encoding == 'None': + return lambda x, **kwargs: x, input_dim + + elif encoding == 'frequency': + from freqencoder import FreqEncoder + encoder = FreqEncoder(input_dim=input_dim, degree=multires) + + elif encoding == 'spherical_harmonics': + from shencoder import SHEncoder + encoder = SHEncoder(input_dim=input_dim, degree=degree) + + elif encoding == 'hashgrid': + from gridencoder import GridEncoder + encoder = GridEncoder(input_dim=input_dim, num_levels=num_levels, level_dim=level_dim, base_resolution=base_resolution, log2_hashmap_size=log2_hashmap_size, desired_resolution=desired_resolution, gridtype='hash', align_corners=align_corners) + + elif encoding == 'tiledgrid': + from gridencoder import GridEncoder + encoder = GridEncoder(input_dim=input_dim, num_levels=num_levels, level_dim=level_dim, base_resolution=base_resolution, log2_hashmap_size=log2_hashmap_size, desired_resolution=desired_resolution, gridtype='tiled', align_corners=align_corners) + + elif encoding == 'ash': + from ashencoder import AshEncoder + encoder = AshEncoder(input_dim=input_dim, output_dim=16, log2_hashmap_size=log2_hashmap_size, resolution=desired_resolution) + + else: + raise NotImplementedError('Unknown encoding mode, choose from [None, frequency, spherical_harmonics, hashgrid, tiledgrid]') + + return encoder, encoder.output_dim \ No newline at end of file diff --git a/freqencoder/__init__.py b/freqencoder/__init__.py new file mode 100644 index 0000000..69ec49c --- /dev/null +++ b/freqencoder/__init__.py @@ -0,0 +1 @@ +from .freq import FreqEncoder \ No newline at end of file diff --git a/freqencoder/backend.py b/freqencoder/backend.py new file mode 100644 index 0000000..a89e351 --- /dev/null +++ b/freqencoder/backend.py @@ -0,0 +1,41 @@ +import os +from torch.utils.cpp_extension import load + +_src_path = os.path.dirname(os.path.abspath(__file__)) + +nvcc_flags = [ + '-O3', '-std=c++14', + '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', + '-use_fast_math' +] + +if os.name == "posix": + c_flags = ['-O3', '-std=c++14'] +elif os.name == "nt": + c_flags = ['/O2', '/std:c++17'] + + # find cl.exe + def find_cl_path(): + import glob + for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: + paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) + if paths: + return paths[0] + + # If cl.exe is not on path, try to find it. + if os.system("where cl.exe >nul 2>nul") != 0: + cl_path = find_cl_path() + if cl_path is None: + raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") + os.environ["PATH"] += ";" + cl_path + +_backend = load(name='_freqencoder', + extra_cflags=c_flags, + extra_cuda_cflags=nvcc_flags, + sources=[os.path.join(_src_path, 'src', f) for f in [ + 'freqencoder.cu', + 'bindings.cpp', + ]], + ) + +__all__ = ['_backend'] \ No newline at end of file diff --git a/freqencoder/freq.py b/freqencoder/freq.py new file mode 100644 index 0000000..05179f1 --- /dev/null +++ b/freqencoder/freq.py @@ -0,0 +1,77 @@ +import numpy as np + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.cuda.amp import custom_bwd, custom_fwd + +try: + import _freqencoder as _backend +except ImportError: + from .backend import _backend + + +class _freq_encoder(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # force float32 for better precision + def forward(ctx, inputs, degree, output_dim): + # inputs: [B, input_dim], float + # RETURN: [B, F], float + + if not inputs.is_cuda: inputs = inputs.cuda() + inputs = inputs.contiguous() + + B, input_dim = inputs.shape # batch size, coord dim + + outputs = torch.empty(B, output_dim, dtype=inputs.dtype, device=inputs.device) + + _backend.freq_encode_forward(inputs, B, input_dim, degree, output_dim, outputs) + + ctx.save_for_backward(inputs, outputs) + ctx.dims = [B, input_dim, degree, output_dim] + + return outputs + + @staticmethod + #@once_differentiable + @custom_bwd + def backward(ctx, grad): + # grad: [B, C * C] + + grad = grad.contiguous() + inputs, outputs = ctx.saved_tensors + B, input_dim, degree, output_dim = ctx.dims + + grad_inputs = torch.zeros_like(inputs) + _backend.freq_encode_backward(grad, outputs, B, input_dim, degree, output_dim, grad_inputs) + + return grad_inputs, None, None + + +freq_encode = _freq_encoder.apply + + +class FreqEncoder(nn.Module): + def __init__(self, input_dim=3, degree=4): + super().__init__() + + self.input_dim = input_dim + self.degree = degree + self.output_dim = input_dim + input_dim * 2 * degree + + def __repr__(self): + return f"FreqEncoder: input_dim={self.input_dim} degree={self.degree} output_dim={self.output_dim}" + + def forward(self, inputs, **kwargs): + # inputs: [..., input_dim] + # return: [..., ] + + prefix_shape = list(inputs.shape[:-1]) + inputs = inputs.reshape(-1, self.input_dim) + + outputs = freq_encode(inputs, self.degree, self.output_dim) + + outputs = outputs.reshape(prefix_shape + [self.output_dim]) + + return outputs \ No newline at end of file diff --git a/freqencoder/setup.py b/freqencoder/setup.py new file mode 100644 index 0000000..c9bb873 --- /dev/null +++ b/freqencoder/setup.py @@ -0,0 +1,51 @@ +import os +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +_src_path = os.path.dirname(os.path.abspath(__file__)) + +nvcc_flags = [ + '-O3', '-std=c++14', + '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', + '-use_fast_math' +] + +if os.name == "posix": + c_flags = ['-O3', '-std=c++14'] +elif os.name == "nt": + c_flags = ['/O2', '/std:c++17'] + + # find cl.exe + def find_cl_path(): + import glob + for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: + paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) + if paths: + return paths[0] + + # If cl.exe is not on path, try to find it. + if os.system("where cl.exe >nul 2>nul") != 0: + cl_path = find_cl_path() + if cl_path is None: + raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") + os.environ["PATH"] += ";" + cl_path + +setup( + name='freqencoder', # package name, import this to use python API + ext_modules=[ + CUDAExtension( + name='_freqencoder', # extension name, import this to use CUDA API + sources=[os.path.join(_src_path, 'src', f) for f in [ + 'freqencoder.cu', + 'bindings.cpp', + ]], + extra_compile_args={ + 'cxx': c_flags, + 'nvcc': nvcc_flags, + } + ), + ], + cmdclass={ + 'build_ext': BuildExtension, + } +) \ No newline at end of file diff --git a/freqencoder/src/bindings.cpp b/freqencoder/src/bindings.cpp new file mode 100644 index 0000000..dc48bd0 --- /dev/null +++ b/freqencoder/src/bindings.cpp @@ -0,0 +1,8 @@ +#include + +#include "freqencoder.h" + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("freq_encode_forward", &freq_encode_forward, "freq encode forward (CUDA)"); + m.def("freq_encode_backward", &freq_encode_backward, "freq encode backward (CUDA)"); +} \ No newline at end of file diff --git a/freqencoder/src/freqencoder.cu b/freqencoder/src/freqencoder.cu new file mode 100644 index 0000000..e1e0e89 --- /dev/null +++ b/freqencoder/src/freqencoder.cu @@ -0,0 +1,129 @@ +#include + +#include +#include +#include + +#include +#include + +#include +#include + +#include + + +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor") +#define CHECK_IS_INT(x) TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, #x " must be an int tensor") +#define CHECK_IS_FLOATING(x) TORCH_CHECK(x.scalar_type() == at::ScalarType::Float || x.scalar_type() == at::ScalarType::Half || x.scalar_type() == at::ScalarType::Double, #x " must be a floating tensor") + +inline constexpr __device__ float PI() { return 3.141592653589793f; } + +template +__host__ __device__ T div_round_up(T val, T divisor) { + return (val + divisor - 1) / divisor; +} + +// inputs: [B, D] +// outputs: [B, C], C = D + D * deg * 2 +__global__ void kernel_freq( + const float * __restrict__ inputs, + uint32_t B, uint32_t D, uint32_t deg, uint32_t C, + float * outputs +) { + // parallel on per-element + const uint32_t t = threadIdx.x + blockIdx.x * blockDim.x; + if (t >= B * C) return; + + // get index + const uint32_t b = t / C; + const uint32_t c = t - b * C; // t % C; + + // locate + inputs += b * D; + outputs += t; + + // write self + if (c < D) { + outputs[0] = inputs[c]; + // write freq + } else { + const uint32_t col = c / D - 1; + const uint32_t d = c % D; + const uint32_t freq = col / 2; + const float phase_shift = (col % 2) * (PI() / 2); + outputs[0] = __sinf(scalbnf(inputs[d], freq) + phase_shift); + } +} + +// grad: [B, C], C = D + D * deg * 2 +// outputs: [B, C] +// grad_inputs: [B, D] +__global__ void kernel_freq_backward( + const float * __restrict__ grad, + const float * __restrict__ outputs, + uint32_t B, uint32_t D, uint32_t deg, uint32_t C, + float * grad_inputs +) { + // parallel on per-element + const uint32_t t = threadIdx.x + blockIdx.x * blockDim.x; + if (t >= B * D) return; + + const uint32_t b = t / D; + const uint32_t d = t - b * D; // t % D; + + // locate + grad += b * C; + outputs += b * C; + grad_inputs += t; + + // register + float result = grad[d]; + grad += D; + outputs += D; + + for (uint32_t f = 0; f < deg; f++) { + result += scalbnf(1.0f, f) * (grad[d] * outputs[D + d] - grad[D + d] * outputs[d]); + grad += 2 * D; + outputs += 2 * D; + } + + // write + grad_inputs[0] = result; +} + + +void freq_encode_forward(at::Tensor inputs, const uint32_t B, const uint32_t D, const uint32_t deg, const uint32_t C, at::Tensor outputs) { + CHECK_CUDA(inputs); + CHECK_CUDA(outputs); + + CHECK_CONTIGUOUS(inputs); + CHECK_CONTIGUOUS(outputs); + + CHECK_IS_FLOATING(inputs); + CHECK_IS_FLOATING(outputs); + + static constexpr uint32_t N_THREADS = 128; + + kernel_freq<<>>(inputs.data_ptr(), B, D, deg, C, outputs.data_ptr()); +} + + +void freq_encode_backward(at::Tensor grad, at::Tensor outputs, const uint32_t B, const uint32_t D, const uint32_t deg, const uint32_t C, at::Tensor grad_inputs) { + CHECK_CUDA(grad); + CHECK_CUDA(outputs); + CHECK_CUDA(grad_inputs); + + CHECK_CONTIGUOUS(grad); + CHECK_CONTIGUOUS(outputs); + CHECK_CONTIGUOUS(grad_inputs); + + CHECK_IS_FLOATING(grad); + CHECK_IS_FLOATING(outputs); + CHECK_IS_FLOATING(grad_inputs); + + static constexpr uint32_t N_THREADS = 128; + + kernel_freq_backward<<>>(grad.data_ptr(), outputs.data_ptr(), B, D, deg, C, grad_inputs.data_ptr()); +} \ No newline at end of file diff --git a/freqencoder/src/freqencoder.h b/freqencoder/src/freqencoder.h new file mode 100644 index 0000000..cc420ee --- /dev/null +++ b/freqencoder/src/freqencoder.h @@ -0,0 +1,10 @@ +# pragma once + +#include +#include + +// _backend.freq_encode_forward(inputs, B, input_dim, degree, output_dim, outputs) +void freq_encode_forward(at::Tensor inputs, const uint32_t B, const uint32_t D, const uint32_t deg, const uint32_t C, at::Tensor outputs); + +// _backend.freq_encode_backward(grad, outputs, B, input_dim, degree, output_dim, grad_inputs) +void freq_encode_backward(at::Tensor grad, at::Tensor outputs, const uint32_t B, const uint32_t D, const uint32_t deg, const uint32_t C, at::Tensor grad_inputs); \ No newline at end of file diff --git a/gridencoder/__init__.py b/gridencoder/__init__.py new file mode 100644 index 0000000..f1476ce --- /dev/null +++ b/gridencoder/__init__.py @@ -0,0 +1 @@ +from .grid import GridEncoder \ No newline at end of file diff --git a/gridencoder/backend.py b/gridencoder/backend.py new file mode 100644 index 0000000..64a39ff --- /dev/null +++ b/gridencoder/backend.py @@ -0,0 +1,40 @@ +import os +from torch.utils.cpp_extension import load + +_src_path = os.path.dirname(os.path.abspath(__file__)) + +nvcc_flags = [ + '-O3', '-std=c++14', + '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', +] + +if os.name == "posix": + c_flags = ['-O3', '-std=c++14', '-finput-charset=UTF-8'] +elif os.name == "nt": + c_flags = ['/O2', '/std:c++17', '/finput-charset=UTF-8'] + + # find cl.exe + def find_cl_path(): + import glob + for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: + paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) + if paths: + return paths[0] + + # If cl.exe is not on path, try to find it. + if os.system("where cl.exe >nul 2>nul") != 0: + cl_path = find_cl_path() + if cl_path is None: + raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") + os.environ["PATH"] += ";" + cl_path + +_backend = load(name='_grid_encoder', + extra_cflags=c_flags, + extra_cuda_cflags=nvcc_flags, + sources=[os.path.join(_src_path, 'src', f) for f in [ + 'gridencoder.cu', + 'bindings.cpp', + ]], + ) + +__all__ = ['_backend'] \ No newline at end of file diff --git a/gridencoder/grid.py b/gridencoder/grid.py new file mode 100644 index 0000000..8536992 --- /dev/null +++ b/gridencoder/grid.py @@ -0,0 +1,155 @@ +import numpy as np + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.cuda.amp import custom_bwd, custom_fwd + +try: + import _gridencoder as _backend +except ImportError: + from .backend import _backend + +_gridtype_to_id = { + 'hash': 0, + 'tiled': 1, +} + +class _grid_encode(Function): + @staticmethod + @custom_fwd + def forward(ctx, inputs, embeddings, offsets, per_level_scale, base_resolution, calc_grad_inputs=False, gridtype=0, align_corners=False): + # inputs: [B, D], float in [0, 1] + # embeddings: [sO, C], float + # offsets: [L + 1], int + # RETURN: [B, F], float + + inputs = inputs.float().contiguous() + + B, D = inputs.shape # batch size, coord dim + L = offsets.shape[0] - 1 # level + C = embeddings.shape[1] # embedding dim for each level + S = np.log2(per_level_scale) # resolution multiplier at each level, apply log2 for later CUDA exp2f + H = base_resolution # base resolution + + # manually handle autocast (only use half precision embeddings, inputs must be float for enough precision) + # if C % 2 != 0, force float, since half for atomicAdd is very slow. + if torch.is_autocast_enabled() and C % 2 == 0: + embeddings = embeddings.to(torch.half) + + # L first, optimize cache for cuda kernel, but needs an extra permute later + outputs = torch.empty(L, B, C, device=inputs.device, dtype=embeddings.dtype) + + if calc_grad_inputs: + dy_dx = torch.empty(B, L * D * C, device=inputs.device, dtype=embeddings.dtype) + else: + dy_dx = None + + _backend.grid_encode_forward(inputs, embeddings, offsets, outputs, B, D, C, L, S, H, dy_dx, gridtype, align_corners) + + # permute back to [B, L * C] + outputs = outputs.permute(1, 0, 2).reshape(B, L * C) + + ctx.save_for_backward(inputs, embeddings, offsets, dy_dx) + ctx.dims = [B, D, C, L, S, H, gridtype] + ctx.align_corners = align_corners + + return outputs + + @staticmethod + #@once_differentiable + @custom_bwd + def backward(ctx, grad): + + inputs, embeddings, offsets, dy_dx = ctx.saved_tensors + B, D, C, L, S, H, gridtype = ctx.dims + align_corners = ctx.align_corners + + # grad: [B, L * C] --> [L, B, C] + grad = grad.view(B, L, C).permute(1, 0, 2).contiguous() + + grad_embeddings = torch.zeros_like(embeddings) + + if dy_dx is not None: + grad_inputs = torch.zeros_like(inputs, dtype=embeddings.dtype) + else: + grad_inputs = None + + _backend.grid_encode_backward(grad, inputs, embeddings, offsets, grad_embeddings, B, D, C, L, S, H, dy_dx, grad_inputs, gridtype, align_corners) + + if dy_dx is not None: + grad_inputs = grad_inputs.to(inputs.dtype) + + return grad_inputs, grad_embeddings, None, None, None, None, None, None + + + +grid_encode = _grid_encode.apply + + +class GridEncoder(nn.Module): + def __init__(self, input_dim=3, num_levels=16, level_dim=2, per_level_scale=2, base_resolution=16, log2_hashmap_size=19, desired_resolution=None, gridtype='hash', align_corners=False): + super().__init__() + + # the finest resolution desired at the last level, if provided, overridee per_level_scale + if desired_resolution is not None: + per_level_scale = np.exp2(np.log2(desired_resolution / base_resolution) / (num_levels - 1)) + + self.input_dim = input_dim # coord dims, 2 or 3 + self.num_levels = num_levels # num levels, each level multiply resolution by 2 + self.level_dim = level_dim # encode channels per level + self.per_level_scale = per_level_scale # multiply resolution by this scale at each level. + self.log2_hashmap_size = log2_hashmap_size + self.base_resolution = base_resolution + self.output_dim = num_levels * level_dim + self.gridtype = gridtype + self.gridtype_id = _gridtype_to_id[gridtype] # "tiled" or "hash" + self.align_corners = align_corners + + # allocate parameters + offsets = [] + offset = 0 + self.max_params = 2 ** log2_hashmap_size + for i in range(num_levels): + resolution = int(np.ceil(base_resolution * per_level_scale ** i)) + params_in_level = min(self.max_params, (resolution if align_corners else resolution + 1) ** input_dim) # limit max number + params_in_level = int(np.ceil(params_in_level / 8) * 8) # make divisible + offsets.append(offset) + offset += params_in_level + # print(resolution, params_in_level) + offsets.append(offset) + offsets = torch.from_numpy(np.array(offsets, dtype=np.int32)) + self.register_buffer('offsets', offsets) + + self.n_params = offsets[-1] * level_dim + + # parameters + self.embeddings = nn.Parameter(torch.empty(offset, level_dim)) + + self.reset_parameters() + + def reset_parameters(self): + std = 1e-4 + self.embeddings.data.uniform_(-std, std) + + def __repr__(self): + return f"GridEncoder: input_dim={self.input_dim} num_levels={self.num_levels} level_dim={self.level_dim} resolution={self.base_resolution} -> {int(round(self.base_resolution * self.per_level_scale ** (self.num_levels - 1)))} per_level_scale={self.per_level_scale:.4f} params={tuple(self.embeddings.shape)} gridtype={self.gridtype} align_corners={self.align_corners}" + + def forward(self, inputs, bound=1): + # inputs: [..., input_dim], normalized real world positions in [-bound, bound] + # return: [..., num_levels * level_dim] + + inputs = (inputs + bound) / (2 * bound) # map to [0, 1] + + #print('inputs', inputs.shape, inputs.dtype, inputs.min().item(), inputs.max().item()) + + prefix_shape = list(inputs.shape[:-1]) + inputs = inputs.view(-1, self.input_dim) + + outputs = grid_encode(inputs, self.embeddings, self.offsets, self.per_level_scale, self.base_resolution, inputs.requires_grad, self.gridtype_id, self.align_corners) + outputs = outputs.view(prefix_shape + [self.output_dim]) + + #print('outputs', outputs.shape, outputs.dtype, outputs.min().item(), outputs.max().item()) + + return outputs \ No newline at end of file diff --git a/gridencoder/setup.py b/gridencoder/setup.py new file mode 100644 index 0000000..bda10a1 --- /dev/null +++ b/gridencoder/setup.py @@ -0,0 +1,50 @@ +import os +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +_src_path = os.path.dirname(os.path.abspath(__file__)) + +nvcc_flags = [ + '-O3', '-std=c++14', + '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', +] + +if os.name == "posix": + c_flags = ['-O3', '-std=c++14'] +elif os.name == "nt": + c_flags = ['/O2', '/std:c++17'] + + # find cl.exe + def find_cl_path(): + import glob + for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: + paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) + if paths: + return paths[0] + + # If cl.exe is not on path, try to find it. + if os.system("where cl.exe >nul 2>nul") != 0: + cl_path = find_cl_path() + if cl_path is None: + raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") + os.environ["PATH"] += ";" + cl_path + +setup( + name='gridencoder', # package name, import this to use python API + ext_modules=[ + CUDAExtension( + name='_gridencoder', # extension name, import this to use CUDA API + sources=[os.path.join(_src_path, 'src', f) for f in [ + 'gridencoder.cu', + 'bindings.cpp', + ]], + extra_compile_args={ + 'cxx': c_flags, + 'nvcc': nvcc_flags, + } + ), + ], + cmdclass={ + 'build_ext': BuildExtension, + } +) \ No newline at end of file diff --git a/gridencoder/src/bindings.cpp b/gridencoder/src/bindings.cpp new file mode 100644 index 0000000..45f29b7 --- /dev/null +++ b/gridencoder/src/bindings.cpp @@ -0,0 +1,8 @@ +#include + +#include "gridencoder.h" + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("grid_encode_forward", &grid_encode_forward, "grid_encode_forward (CUDA)"); + m.def("grid_encode_backward", &grid_encode_backward, "grid_encode_backward (CUDA)"); +} \ No newline at end of file diff --git a/gridencoder/src/gridencoder.cu b/gridencoder/src/gridencoder.cu new file mode 100644 index 0000000..34c1aba --- /dev/null +++ b/gridencoder/src/gridencoder.cu @@ -0,0 +1,479 @@ +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include + + +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor") +#define CHECK_IS_INT(x) TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, #x " must be an int tensor") +#define CHECK_IS_FLOATING(x) TORCH_CHECK(x.scalar_type() == at::ScalarType::Float || x.scalar_type() == at::ScalarType::Half || x.scalar_type() == at::ScalarType::Double, #x " must be a floating tensor") + + +// just for compatability of half precision in AT_DISPATCH_FLOATING_TYPES_AND_HALF... +static inline __device__ at::Half atomicAdd(at::Half *address, at::Half val) { + // requires CUDA >= 10 and ARCH >= 70 + // this is very slow compared to float or __half2, and never used. + //return atomicAdd(reinterpret_cast<__half*>(address), val); +} + + +template +static inline __host__ __device__ T div_round_up(T val, T divisor) { + return (val + divisor - 1) / divisor; +} + + +template +__device__ uint32_t fast_hash(const uint32_t pos_grid[D]) { + static_assert(D <= 7, "fast_hash can only hash up to 7 dimensions."); + + // While 1 is technically not a good prime for hashing (or a prime at all), it helps memory coherence + // and is sufficient for our use case of obtaining a uniformly colliding index from high-dimensional + // coordinates. + constexpr uint32_t primes[7] = { 1, 2654435761, 805459861, 3674653429, 2097192037, 1434869437, 2165219737 }; + + uint32_t result = 0; + #pragma unroll + for (uint32_t i = 0; i < D; ++i) { + result ^= pos_grid[i] * primes[i]; + } + + return result; +} + + +template +__device__ uint32_t get_grid_index(const uint32_t gridtype, const bool align_corners, const uint32_t ch, const uint32_t hashmap_size, const uint32_t resolution, const uint32_t pos_grid[D]) { + uint32_t stride = 1; + uint32_t index = 0; + + #pragma unroll + for (uint32_t d = 0; d < D && stride <= hashmap_size; d++) { + index += pos_grid[d] * stride; + stride *= align_corners ? resolution: (resolution + 1); + } + + // NOTE: for NeRF, the hash is in fact not necessary. Check https://github.com/NVlabs/instant-ngp/issues/97. + // gridtype: 0 == hash, 1 == tiled + if (gridtype == 0 && stride > hashmap_size) { + index = fast_hash(pos_grid); + } + + return (index % hashmap_size) * C + ch; +} + + +template +__global__ void kernel_grid( + const float * __restrict__ inputs, + const scalar_t * __restrict__ grid, + const int * __restrict__ offsets, + scalar_t * __restrict__ outputs, + const uint32_t B, const uint32_t L, const float S, const uint32_t H, + scalar_t * __restrict__ dy_dx, + const uint32_t gridtype, + const bool align_corners +) { + const uint32_t b = blockIdx.x * blockDim.x + threadIdx.x; + + if (b >= B) return; + + const uint32_t level = blockIdx.y; + + // locate + grid += (uint32_t)offsets[level] * C; + inputs += b * D; + outputs += level * B * C + b * C; + + // check input range (should be in [0, 1]) + bool flag_oob = false; + #pragma unroll + for (uint32_t d = 0; d < D; d++) { + if (inputs[d] < 0 || inputs[d] > 1) { + flag_oob = true; + } + } + // if input out of bound, just set output to 0 + if (flag_oob) { + #pragma unroll + for (uint32_t ch = 0; ch < C; ch++) { + outputs[ch] = 0; + } + if (dy_dx) { + dy_dx += b * D * L * C + level * D * C; // B L D C + #pragma unroll + for (uint32_t d = 0; d < D; d++) { + #pragma unroll + for (uint32_t ch = 0; ch < C; ch++) { + dy_dx[d * C + ch] = 0; + } + } + } + return; + } + + const uint32_t hashmap_size = offsets[level + 1] - offsets[level]; + const float scale = exp2f(level * S) * H - 1.0f; + const uint32_t resolution = (uint32_t)ceil(scale) + 1; + + // calculate coordinate + float pos[D]; + uint32_t pos_grid[D]; + + #pragma unroll + for (uint32_t d = 0; d < D; d++) { + pos[d] = inputs[d] * scale + (align_corners ? 0.0f : 0.5f); + pos_grid[d] = floorf(pos[d]); + pos[d] -= (float)pos_grid[d]; + } + + //printf("[b=%d, l=%d] pos=(%f, %f)+(%d, %d)\n", b, level, pos[0], pos[1], pos_grid[0], pos_grid[1]); + + // interpolate + scalar_t results[C] = {0}; // temp results in register + + #pragma unroll + for (uint32_t idx = 0; idx < (1 << D); idx++) { + float w = 1; + uint32_t pos_grid_local[D]; + + #pragma unroll + for (uint32_t d = 0; d < D; d++) { + if ((idx & (1 << d)) == 0) { + w *= 1 - pos[d]; + pos_grid_local[d] = pos_grid[d]; + } else { + w *= pos[d]; + pos_grid_local[d] = pos_grid[d] + 1; + } + } + + uint32_t index = get_grid_index(gridtype, align_corners, 0, hashmap_size, resolution, pos_grid_local); + + // writing to register (fast) + #pragma unroll + for (uint32_t ch = 0; ch < C; ch++) { + results[ch] += w * grid[index + ch]; + } + + //printf("[b=%d, l=%d] int %d, idx %d, w %f, val %f\n", b, level, idx, index, w, grid[index]); + } + + // writing to global memory (slow) + #pragma unroll + for (uint32_t ch = 0; ch < C; ch++) { + outputs[ch] = results[ch]; + } + + // prepare dy_dx + // differentiable (soft) indexing: https://discuss.pytorch.org/t/differentiable-indexing/17647/9 + if (dy_dx) { + + dy_dx += b * D * L * C + level * D * C; // B L D C + + #pragma unroll + for (uint32_t gd = 0; gd < D; gd++) { + + scalar_t results_grad[C] = {0}; + + #pragma unroll + for (uint32_t idx = 0; idx < (1 << (D - 1)); idx++) { + float w = scale; + uint32_t pos_grid_local[D]; + + #pragma unroll + for (uint32_t nd = 0; nd < D - 1; nd++) { + const uint32_t d = (nd >= gd) ? (nd + 1) : nd; + + if ((idx & (1 << nd)) == 0) { + w *= 1 - pos[d]; + pos_grid_local[d] = pos_grid[d]; + } else { + w *= pos[d]; + pos_grid_local[d] = pos_grid[d] + 1; + } + } + + pos_grid_local[gd] = pos_grid[gd]; + uint32_t index_left = get_grid_index(gridtype, align_corners, 0, hashmap_size, resolution, pos_grid_local); + pos_grid_local[gd] = pos_grid[gd] + 1; + uint32_t index_right = get_grid_index(gridtype, align_corners, 0, hashmap_size, resolution, pos_grid_local); + + #pragma unroll + for (uint32_t ch = 0; ch < C; ch++) { + results_grad[ch] += w * (grid[index_right + ch] - grid[index_left + ch]); + } + } + + #pragma unroll + for (uint32_t ch = 0; ch < C; ch++) { + dy_dx[gd * C + ch] = results_grad[ch]; + } + } + } +} + + +template +__global__ void kernel_grid_backward( + const scalar_t * __restrict__ grad, + const float * __restrict__ inputs, + const scalar_t * __restrict__ grid, + const int * __restrict__ offsets, + scalar_t * __restrict__ grad_grid, + const uint32_t B, const uint32_t L, const float S, const uint32_t H, + const uint32_t gridtype, + const bool align_corners +) { + const uint32_t b = (blockIdx.x * blockDim.x + threadIdx.x) * N_C / C; + if (b >= B) return; + + const uint32_t level = blockIdx.y; + const uint32_t ch = (blockIdx.x * blockDim.x + threadIdx.x) * N_C - b * C; + + // locate + grad_grid += offsets[level] * C; + inputs += b * D; + grad += level * B * C + b * C + ch; // L, B, C + + const uint32_t hashmap_size = offsets[level + 1] - offsets[level]; + const float scale = exp2f(level * S) * H - 1.0f; + const uint32_t resolution = (uint32_t)ceil(scale) + 1; + + // check input range (should be in [0, 1]) + #pragma unroll + for (uint32_t d = 0; d < D; d++) { + if (inputs[d] < 0 || inputs[d] > 1) { + return; // grad is init as 0, so we simply return. + } + } + + // calculate coordinate + float pos[D]; + uint32_t pos_grid[D]; + + #pragma unroll + for (uint32_t d = 0; d < D; d++) { + pos[d] = inputs[d] * scale + (align_corners ? 0.0f : 0.5f); + pos_grid[d] = floorf(pos[d]); + pos[d] -= (float)pos_grid[d]; + } + + scalar_t grad_cur[N_C] = {0}; // fetch to register + #pragma unroll + for (uint32_t c = 0; c < N_C; c++) { + grad_cur[c] = grad[c]; + } + + // interpolate + #pragma unroll + for (uint32_t idx = 0; idx < (1 << D); idx++) { + float w = 1; + uint32_t pos_grid_local[D]; + + #pragma unroll + for (uint32_t d = 0; d < D; d++) { + if ((idx & (1 << d)) == 0) { + w *= 1 - pos[d]; + pos_grid_local[d] = pos_grid[d]; + } else { + w *= pos[d]; + pos_grid_local[d] = pos_grid[d] + 1; + } + } + + uint32_t index = get_grid_index(gridtype, align_corners, ch, hashmap_size, resolution, pos_grid_local); + + // atomicAdd for __half is slow (especially for large values), so we use __half2 if N_C % 2 == 0 + // TODO: use float which is better than __half, if N_C % 2 != 0 + if (std::is_same::value && N_C % 2 == 0) { + #pragma unroll + for (uint32_t c = 0; c < N_C; c += 2) { + // process two __half at once (by interpreting as a __half2) + __half2 v = {(__half)(w * grad_cur[c]), (__half)(w * grad_cur[c + 1])}; + atomicAdd((__half2*)&grad_grid[index + c], v); + } + // float, or __half when N_C % 2 != 0 (which means C == 1) + } else { + #pragma unroll + for (uint32_t c = 0; c < N_C; c++) { + atomicAdd(&grad_grid[index + c], w * grad_cur[c]); + } + } + } +} + + +template +__global__ void kernel_input_backward( + const scalar_t * __restrict__ grad, + const scalar_t * __restrict__ dy_dx, + scalar_t * __restrict__ grad_inputs, + uint32_t B, uint32_t L +) { + const uint32_t t = threadIdx.x + blockIdx.x * blockDim.x; + if (t >= B * D) return; + + const uint32_t b = t / D; + const uint32_t d = t - b * D; + + dy_dx += b * L * D * C; + + scalar_t result = 0; + + # pragma unroll + for (int l = 0; l < L; l++) { + # pragma unroll + for (int ch = 0; ch < C; ch++) { + result += grad[l * B * C + b * C + ch] * dy_dx[l * D * C + d * C + ch]; + } + } + + grad_inputs[t] = result; +} + + +template +void kernel_grid_wrapper(const float *inputs, const scalar_t *embeddings, const int *offsets, scalar_t *outputs, const uint32_t B, const uint32_t C, const uint32_t L, const float S, const uint32_t H, scalar_t *dy_dx, const uint32_t gridtype, const bool align_corners) { + static constexpr uint32_t N_THREAD = 512; + const dim3 blocks_hashgrid = { div_round_up(B, N_THREAD), L, 1 }; + switch (C) { + case 1: kernel_grid<<>>(inputs, embeddings, offsets, outputs, B, L, S, H, dy_dx, gridtype, align_corners); break; + case 2: kernel_grid<<>>(inputs, embeddings, offsets, outputs, B, L, S, H, dy_dx, gridtype, align_corners); break; + case 4: kernel_grid<<>>(inputs, embeddings, offsets, outputs, B, L, S, H, dy_dx, gridtype, align_corners); break; + case 8: kernel_grid<<>>(inputs, embeddings, offsets, outputs, B, L, S, H, dy_dx, gridtype, align_corners); break; + default: throw std::runtime_error{"GridEncoding: C must be 1, 2, 4, or 8."}; + } +} + +// inputs: [B, D], float, in [0, 1] +// embeddings: [sO, C], float +// offsets: [L + 1], uint32_t +// outputs: [L, B, C], float (L first, so only one level of hashmap needs to fit into cache at a time.) +// H: base resolution +// dy_dx: [B, L * D * C] +template +void grid_encode_forward_cuda(const float *inputs, const scalar_t *embeddings, const int *offsets, scalar_t *outputs, const uint32_t B, const uint32_t D, const uint32_t C, const uint32_t L, const float S, const uint32_t H, scalar_t *dy_dx, const uint32_t gridtype, const bool align_corners) { + switch (D) { + case 1: kernel_grid_wrapper(inputs, embeddings, offsets, outputs, B, C, L, S, H, dy_dx, gridtype, align_corners); break; + case 2: kernel_grid_wrapper(inputs, embeddings, offsets, outputs, B, C, L, S, H, dy_dx, gridtype, align_corners); break; + case 3: kernel_grid_wrapper(inputs, embeddings, offsets, outputs, B, C, L, S, H, dy_dx, gridtype, align_corners); break; + case 4: kernel_grid_wrapper(inputs, embeddings, offsets, outputs, B, C, L, S, H, dy_dx, gridtype, align_corners); break; + case 5: kernel_grid_wrapper(inputs, embeddings, offsets, outputs, B, C, L, S, H, dy_dx, gridtype, align_corners); break; + default: throw std::runtime_error{"GridEncoding: D must be 1, 2, 3, 4, or 5"}; + } + +} + +template +void kernel_grid_backward_wrapper(const scalar_t *grad, const float *inputs, const scalar_t *embeddings, const int *offsets, scalar_t *grad_embeddings, const uint32_t B, const uint32_t C, const uint32_t L, const float S, const uint32_t H, scalar_t *dy_dx, scalar_t *grad_inputs, const uint32_t gridtype, const bool align_corners) { + static constexpr uint32_t N_THREAD = 256; + const uint32_t N_C = std::min(2u, C); // n_features_per_thread + const dim3 blocks_hashgrid = { div_round_up(B * C / N_C, N_THREAD), L, 1 }; + switch (C) { + case 1: + kernel_grid_backward<<>>(grad, inputs, embeddings, offsets, grad_embeddings, B, L, S, H, gridtype, align_corners); + if (dy_dx) kernel_input_backward<<>>(grad, dy_dx, grad_inputs, B, L); + break; + case 2: + kernel_grid_backward<<>>(grad, inputs, embeddings, offsets, grad_embeddings, B, L, S, H, gridtype, align_corners); + if (dy_dx) kernel_input_backward<<>>(grad, dy_dx, grad_inputs, B, L); + break; + case 4: + kernel_grid_backward<<>>(grad, inputs, embeddings, offsets, grad_embeddings, B, L, S, H, gridtype, align_corners); + if (dy_dx) kernel_input_backward<<>>(grad, dy_dx, grad_inputs, B, L); + break; + case 8: + kernel_grid_backward<<>>(grad, inputs, embeddings, offsets, grad_embeddings, B, L, S, H, gridtype, align_corners); + if (dy_dx) kernel_input_backward<<>>(grad, dy_dx, grad_inputs, B, L); + break; + default: throw std::runtime_error{"GridEncoding: C must be 1, 2, 4, or 8."}; + } +} + + +// grad: [L, B, C], float +// inputs: [B, D], float, in [0, 1] +// embeddings: [sO, C], float +// offsets: [L + 1], uint32_t +// grad_embeddings: [sO, C] +// H: base resolution +template +void grid_encode_backward_cuda(const scalar_t *grad, const float *inputs, const scalar_t *embeddings, const int *offsets, scalar_t *grad_embeddings, const uint32_t B, const uint32_t D, const uint32_t C, const uint32_t L, const float S, const uint32_t H, scalar_t *dy_dx, scalar_t *grad_inputs, const uint32_t gridtype, const bool align_corners) { + switch (D) { + case 1: kernel_grid_backward_wrapper(grad, inputs, embeddings, offsets, grad_embeddings, B, C, L, S, H, dy_dx, grad_inputs, gridtype, align_corners); break; + case 2: kernel_grid_backward_wrapper(grad, inputs, embeddings, offsets, grad_embeddings, B, C, L, S, H, dy_dx, grad_inputs, gridtype, align_corners); break; + case 3: kernel_grid_backward_wrapper(grad, inputs, embeddings, offsets, grad_embeddings, B, C, L, S, H, dy_dx, grad_inputs, gridtype, align_corners); break; + case 4: kernel_grid_backward_wrapper(grad, inputs, embeddings, offsets, grad_embeddings, B, C, L, S, H, dy_dx, grad_inputs, gridtype, align_corners); break; + case 5: kernel_grid_backward_wrapper(grad, inputs, embeddings, offsets, grad_embeddings, B, C, L, S, H, dy_dx, grad_inputs, gridtype, align_corners); break; + default: throw std::runtime_error{"GridEncoding: D must be 1, 2, 3, 4, or 5"}; + } +} + + + +void grid_encode_forward(const at::Tensor inputs, const at::Tensor embeddings, const at::Tensor offsets, at::Tensor outputs, const uint32_t B, const uint32_t D, const uint32_t C, const uint32_t L, const float S, const uint32_t H, at::optional dy_dx, const uint32_t gridtype, const bool align_corners) { + CHECK_CUDA(inputs); + CHECK_CUDA(embeddings); + CHECK_CUDA(offsets); + CHECK_CUDA(outputs); + // CHECK_CUDA(dy_dx); + + CHECK_CONTIGUOUS(inputs); + CHECK_CONTIGUOUS(embeddings); + CHECK_CONTIGUOUS(offsets); + CHECK_CONTIGUOUS(outputs); + // CHECK_CONTIGUOUS(dy_dx); + + CHECK_IS_FLOATING(inputs); + CHECK_IS_FLOATING(embeddings); + CHECK_IS_INT(offsets); + CHECK_IS_FLOATING(outputs); + // CHECK_IS_FLOATING(dy_dx); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + embeddings.scalar_type(), "grid_encode_forward", ([&] { + grid_encode_forward_cuda(inputs.data_ptr(), embeddings.data_ptr(), offsets.data_ptr(), outputs.data_ptr(), B, D, C, L, S, H, dy_dx.has_value() ? dy_dx.value().data_ptr() : nullptr, gridtype, align_corners); + })); +} + +void grid_encode_backward(const at::Tensor grad, const at::Tensor inputs, const at::Tensor embeddings, const at::Tensor offsets, at::Tensor grad_embeddings, const uint32_t B, const uint32_t D, const uint32_t C, const uint32_t L, const float S, const uint32_t H, const at::optional dy_dx, at::optional grad_inputs, const uint32_t gridtype, const bool align_corners) { + CHECK_CUDA(grad); + CHECK_CUDA(inputs); + CHECK_CUDA(embeddings); + CHECK_CUDA(offsets); + CHECK_CUDA(grad_embeddings); + // CHECK_CUDA(dy_dx); + // CHECK_CUDA(grad_inputs); + + CHECK_CONTIGUOUS(grad); + CHECK_CONTIGUOUS(inputs); + CHECK_CONTIGUOUS(embeddings); + CHECK_CONTIGUOUS(offsets); + CHECK_CONTIGUOUS(grad_embeddings); + // CHECK_CONTIGUOUS(dy_dx); + // CHECK_CONTIGUOUS(grad_inputs); + + CHECK_IS_FLOATING(grad); + CHECK_IS_FLOATING(inputs); + CHECK_IS_FLOATING(embeddings); + CHECK_IS_INT(offsets); + CHECK_IS_FLOATING(grad_embeddings); + // CHECK_IS_FLOATING(dy_dx); + // CHECK_IS_FLOATING(grad_inputs); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad.scalar_type(), "grid_encode_backward", ([&] { + grid_encode_backward_cuda(grad.data_ptr(), inputs.data_ptr(), embeddings.data_ptr(), offsets.data_ptr(), grad_embeddings.data_ptr(), B, D, C, L, S, H, dy_dx.has_value() ? dy_dx.value().data_ptr() : nullptr, grad_inputs.has_value() ? grad_inputs.value().data_ptr() : nullptr, gridtype, align_corners); + })); + +} diff --git a/gridencoder/src/gridencoder.h b/gridencoder/src/gridencoder.h new file mode 100644 index 0000000..89b6249 --- /dev/null +++ b/gridencoder/src/gridencoder.h @@ -0,0 +1,15 @@ +#ifndef _HASH_ENCODE_H +#define _HASH_ENCODE_H + +#include +#include + +// inputs: [B, D], float, in [0, 1] +// embeddings: [sO, C], float +// offsets: [L + 1], uint32_t +// outputs: [B, L * C], float +// H: base resolution +void grid_encode_forward(const at::Tensor inputs, const at::Tensor embeddings, const at::Tensor offsets, at::Tensor outputs, const uint32_t B, const uint32_t D, const uint32_t C, const uint32_t L, const float S, const uint32_t H, at::optional dy_dx, const uint32_t gridtype, const bool align_corners); +void grid_encode_backward(const at::Tensor grad, const at::Tensor inputs, const at::Tensor embeddings, const at::Tensor offsets, at::Tensor grad_embeddings, const uint32_t B, const uint32_t D, const uint32_t C, const uint32_t L, const float S, const uint32_t H, const at::optional dy_dx, at::optional grad_inputs, const uint32_t gridtype, const bool align_corners); + +#endif \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..d6235ad --- /dev/null +++ b/main.py @@ -0,0 +1,260 @@ +import torch +import argparse + +from nerf_triplane.provider import NeRFDataset +from nerf_triplane.gui import NeRFGUI +from nerf_triplane.utils import * +from nerf_triplane.network import NeRFNetwork + +# torch.autograd.set_detect_anomaly(True) +# Close tf32 features. Fix low numerical accuracy on rtx30xx gpu. +try: + torch.backends.cuda.matmul.allow_tf32 = False + torch.backends.cudnn.allow_tf32 = False +except AttributeError as e: + print('Info. This pytorch version is not support with tf32.') + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('path', type=str) + parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --exp_eye") + parser.add_argument('--test', action='store_true', help="test mode (load model and test dataset)") + parser.add_argument('--test_train', action='store_true', help="test mode (load model and train dataset)") + parser.add_argument('--data_range', type=int, nargs='*', default=[0, -1], help="data range to use") + parser.add_argument('--workspace', type=str, default='workspace') + parser.add_argument('--seed', type=int, default=0) + + ### training options + parser.add_argument('--iters', type=int, default=200000, help="training iters") + parser.add_argument('--lr', type=float, default=1e-2, help="initial learning rate") + parser.add_argument('--lr_net', type=float, default=1e-3, help="initial learning rate") + parser.add_argument('--ckpt', type=str, default='latest') + parser.add_argument('--num_rays', type=int, default=4096 * 16, help="num rays sampled per image for each training step") + parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch") + parser.add_argument('--max_steps', type=int, default=16, help="max num steps sampled per ray (only valid when using --cuda_ray)") + parser.add_argument('--num_steps', type=int, default=16, help="num steps sampled per ray (only valid when NOT using --cuda_ray)") + parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when NOT using --cuda_ray)") + parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)") + parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when NOT using --cuda_ray)") + + ### loss set + parser.add_argument('--warmup_step', type=int, default=10000, help="warm up steps") + parser.add_argument('--amb_aud_loss', type=int, default=1, help="use ambient aud loss") + parser.add_argument('--amb_eye_loss', type=int, default=1, help="use ambient eye loss") + parser.add_argument('--unc_loss', type=int, default=1, help="use uncertainty loss") + parser.add_argument('--lambda_amb', type=float, default=1e-4, help="lambda for ambient loss") + + ### network backbone options + parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training") + + parser.add_argument('--bg_img', type=str, default='', help="background image") + parser.add_argument('--fbg', action='store_true', help="frame-wise bg") + parser.add_argument('--exp_eye', action='store_true', help="explicitly control the eyes") + parser.add_argument('--fix_eye', type=float, default=-1, help="fixed eye area, negative to disable, set to 0-0.3 for a reasonable eye") + parser.add_argument('--smooth_eye', action='store_true', help="smooth the eye area sequence") + + parser.add_argument('--torso_shrink', type=float, default=0.8, help="shrink bg coords to allow more flexibility in deform") + + ### dataset options + parser.add_argument('--color_space', type=str, default='srgb', help="Color space, supports (linear, srgb)") + parser.add_argument('--preload', type=int, default=0, help="0 means load data from disk on-the-fly, 1 means preload to CPU, 2 means GPU.") + # (the default value is for the fox dataset) + parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.") + parser.add_argument('--scale', type=float, default=4, help="scale camera location into box[-bound, bound]^3") + parser.add_argument('--offset', type=float, nargs='*', default=[0, 0, 0], help="offset of camera location") + parser.add_argument('--dt_gamma', type=float, default=1/256, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)") + parser.add_argument('--min_near', type=float, default=0.05, help="minimum near distance for camera") + parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied (sigma)") + parser.add_argument('--density_thresh_torso', type=float, default=0.01, help="threshold for density grid to be occupied (alpha)") + parser.add_argument('--patch_size', type=int, default=1, help="[experimental] render patches in training, so as to apply LPIPS loss. 1 means disabled, use [64, 32, 16] to enable") + + parser.add_argument('--init_lips', action='store_true', help="init lips region") + parser.add_argument('--finetune_lips', action='store_true', help="use LPIPS and landmarks to fine tune lips region") + parser.add_argument('--smooth_lips', action='store_true', help="smooth the enc_a in a exponential decay way...") + + parser.add_argument('--torso', action='store_true', help="fix head and train torso") + parser.add_argument('--head_ckpt', type=str, default='', help="head model") + + ### GUI options + parser.add_argument('--gui', action='store_true', help="start a GUI") + parser.add_argument('--W', type=int, default=450, help="GUI width") + parser.add_argument('--H', type=int, default=450, help="GUI height") + parser.add_argument('--radius', type=float, default=3.35, help="default GUI camera radius from center") + parser.add_argument('--fovy', type=float, default=21.24, help="default GUI camera fovy") + parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel") + + ### else + parser.add_argument('--att', type=int, default=2, help="audio attention mode (0 = turn off, 1 = left-direction, 2 = bi-direction)") + parser.add_argument('--aud', type=str, default='', help="audio source (empty will load the default, else should be a path to a npy file)") + parser.add_argument('--emb', action='store_true', help="use audio class + embedding instead of logits") + + parser.add_argument('--ind_dim', type=int, default=4, help="individual code dim, 0 to turn off") + parser.add_argument('--ind_num', type=int, default=10000, help="number of individual codes, should be larger than training dataset size") + + parser.add_argument('--ind_dim_torso', type=int, default=8, help="individual code dim, 0 to turn off") + + parser.add_argument('--amb_dim', type=int, default=2, help="ambient dimension") + parser.add_argument('--part', action='store_true', help="use partial training data (1/10)") + parser.add_argument('--part2', action='store_true', help="use partial training data (first 15s)") + + parser.add_argument('--train_camera', action='store_true', help="optimize camera pose") + parser.add_argument('--smooth_path', action='store_true', help="brute-force smooth camera pose trajectory with a window size") + parser.add_argument('--smooth_path_window', type=int, default=7, help="smoothing window size") + + # asr + parser.add_argument('--asr', action='store_true', help="load asr for real-time app") + parser.add_argument('--asr_wav', type=str, default='', help="load the wav and use as input") + parser.add_argument('--asr_play', action='store_true', help="play out the audio") + + parser.add_argument('--asr_model', type=str, default='deepspeech') + # parser.add_argument('--asr_model', type=str, default='cpierse/wav2vec2-large-xlsr-53-esperanto') + # parser.add_argument('--asr_model', type=str, default='facebook/wav2vec2-large-960h-lv60-self') + + parser.add_argument('--asr_save_feats', action='store_true') + # audio FPS + parser.add_argument('--fps', type=int, default=50) + # sliding window left-middle-right length (unit: 20ms) + parser.add_argument('-l', type=int, default=10) + parser.add_argument('-m', type=int, default=50) + parser.add_argument('-r', type=int, default=10) + + opt = parser.parse_args() + + if opt.O: + opt.fp16 = True + opt.exp_eye = True + + if opt.test and False: + opt.smooth_path = True + opt.smooth_eye = True + opt.smooth_lips = True + + opt.cuda_ray = True + # assert opt.cuda_ray, "Only support CUDA ray mode." + + if opt.patch_size > 1: + # assert opt.patch_size > 16, "patch_size should > 16 to run LPIPS loss." + assert opt.num_rays % (opt.patch_size ** 2) == 0, "patch_size ** 2 should be dividable by num_rays." + + # if opt.finetune_lips: + # # do not update density grid in finetune stage + # opt.update_extra_interval = 1e9 + + print(opt) + + seed_everything(opt.seed) + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + model = NeRFNetwork(opt) + + # manually load state dict for head + if opt.torso and opt.head_ckpt != '': + + model_dict = torch.load(opt.head_ckpt, map_location='cpu')['model'] + + missing_keys, unexpected_keys = model.load_state_dict(model_dict, strict=False) + + if len(missing_keys) > 0: + print(f"[WARN] missing keys: {missing_keys}") + if len(unexpected_keys) > 0: + print(f"[WARN] unexpected keys: {unexpected_keys}") + + # freeze these keys + for k, v in model.named_parameters(): + if k in model_dict: + # print(f'[INFO] freeze {k}, {v.shape}') + v.requires_grad = False + + + # print(model) + + criterion = torch.nn.MSELoss(reduction='none') + + if opt.test: + + if opt.gui: + metrics = [] # use no metric in GUI for faster initialization... + else: + # metrics = [PSNRMeter(), LPIPSMeter(device=device)] + metrics = [PSNRMeter(), LPIPSMeter(device=device), LMDMeter(backend='fan')] + + trainer = Trainer('ngp', opt, model, device=device, workspace=opt.workspace, criterion=criterion, fp16=opt.fp16, metrics=metrics, use_checkpoint=opt.ckpt) + + if opt.test_train: + test_set = NeRFDataset(opt, device=device, type='train') + # a manual fix to test on the training dataset + test_set.training = False + test_set.num_rays = -1 + test_loader = test_set.dataloader() + else: + test_loader = NeRFDataset(opt, device=device, type='test').dataloader() + + + # temp fix: for update_extra_states + model.aud_features = test_loader._data.auds + model.eye_areas = test_loader._data.eye_area + + if opt.gui: + # we still need test_loader to provide audio features for testing. + with NeRFGUI(opt, trainer, test_loader) as gui: + gui.render() + + else: + ### test and save video (fast) + trainer.test(test_loader) + + ### evaluate metrics (slow) + if test_loader.has_gt: + trainer.evaluate(test_loader) + + + + else: + + optimizer = lambda model: torch.optim.AdamW(model.get_params(opt.lr, opt.lr_net), betas=(0, 0.99), eps=1e-8) + + train_loader = NeRFDataset(opt, device=device, type='train').dataloader() + + assert len(train_loader) < opt.ind_num, f"[ERROR] dataset too many frames: {len(train_loader)}, please increase --ind_num to this number!" + + # temp fix: for update_extra_states + model.aud_features = train_loader._data.auds + model.eye_area = train_loader._data.eye_area + model.poses = train_loader._data.poses + + # decay to 0.1 * init_lr at last iter step + if opt.finetune_lips: + scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.05 ** (iter / opt.iters)) + else: + scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.5 ** (iter / opt.iters)) + + metrics = [PSNRMeter(), LPIPSMeter(device=device)] + + eval_interval = max(1, int(5000 / len(train_loader))) + trainer = Trainer('ngp', opt, model, device=device, workspace=opt.workspace, optimizer=optimizer, criterion=criterion, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, scheduler_update_every_step=True, metrics=metrics, use_checkpoint=opt.ckpt, eval_interval=eval_interval) + with open(os.path.join(opt.workspace, 'opt.txt'), 'a') as f: + f.write(str(opt)) + if opt.gui: + with NeRFGUI(opt, trainer, train_loader) as gui: + gui.render() + + else: + valid_loader = NeRFDataset(opt, device=device, type='val', downscale=1).dataloader() + + max_epochs = np.ceil(opt.iters / len(train_loader)).astype(np.int32) + print(f'[INFO] max_epoch = {max_epochs}') + trainer.train(train_loader, valid_loader, max_epochs) + + # free some mem + del train_loader, valid_loader + torch.cuda.empty_cache() + + # also test + test_loader = NeRFDataset(opt, device=device, type='test').dataloader() + + if test_loader.has_gt: + trainer.evaluate(test_loader) # blender has gt, so evaluate it. + + trainer.test(test_loader) \ No newline at end of file diff --git a/nerf_triplane/asr.py b/nerf_triplane/asr.py new file mode 100644 index 0000000..dc8db9c --- /dev/null +++ b/nerf_triplane/asr.py @@ -0,0 +1,419 @@ +import time +import numpy as np +import torch +import torch.nn.functional as F +from transformers import AutoModelForCTC, AutoProcessor + +import pyaudio +import soundfile as sf +import resampy + +from queue import Queue +from threading import Thread, Event + + +def _read_frame(stream, exit_event, queue, chunk): + + while True: + if exit_event.is_set(): + print(f'[INFO] read frame thread ends') + break + frame = stream.read(chunk, exception_on_overflow=False) + frame = np.frombuffer(frame, dtype=np.int16).astype(np.float32) / 32767 # [chunk] + queue.put(frame) + +def _play_frame(stream, exit_event, queue, chunk): + + while True: + if exit_event.is_set(): + print(f'[INFO] play frame thread ends') + break + frame = queue.get() + frame = (frame * 32767).astype(np.int16).tobytes() + stream.write(frame, chunk) + +class ASR: + def __init__(self, opt): + + self.opt = opt + + self.play = opt.asr_play + + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + self.fps = opt.fps # 20 ms per frame + self.sample_rate = 16000 + self.chunk = self.sample_rate // self.fps # 320 samples per chunk (20ms * 16000 / 1000) + self.mode = 'live' if opt.asr_wav == '' else 'file' + + if 'esperanto' in self.opt.asr_model: + self.audio_dim = 44 + elif 'deepspeech' in self.opt.asr_model: + self.audio_dim = 29 + else: + self.audio_dim = 32 + + # prepare context cache + # each segment is (stride_left + ctx + stride_right) * 20ms, latency should be (ctx + stride_right) * 20ms + self.context_size = opt.m + self.stride_left_size = opt.l + self.stride_right_size = opt.r + self.text = '[START]\n' + self.terminated = False + self.frames = [] + + # pad left frames + if self.stride_left_size > 0: + self.frames.extend([np.zeros(self.chunk, dtype=np.float32)] * self.stride_left_size) + + + self.exit_event = Event() + self.audio_instance = pyaudio.PyAudio() + + # create input stream + if self.mode == 'file': + self.file_stream = self.create_file_stream() + else: + # start a background process to read frames + self.input_stream = self.audio_instance.open(format=pyaudio.paInt16, channels=1, rate=self.sample_rate, input=True, output=False, frames_per_buffer=self.chunk) + self.queue = Queue() + self.process_read_frame = Thread(target=_read_frame, args=(self.input_stream, self.exit_event, self.queue, self.chunk)) + + # play out the audio too...? + if self.play: + self.output_stream = self.audio_instance.open(format=pyaudio.paInt16, channels=1, rate=self.sample_rate, input=False, output=True, frames_per_buffer=self.chunk) + self.output_queue = Queue() + self.process_play_frame = Thread(target=_play_frame, args=(self.output_stream, self.exit_event, self.output_queue, self.chunk)) + + # current location of audio + self.idx = 0 + + # create wav2vec model + print(f'[INFO] loading ASR model {self.opt.asr_model}...') + self.processor = AutoProcessor.from_pretrained(opt.asr_model) + self.model = AutoModelForCTC.from_pretrained(opt.asr_model).to(self.device) + + # prepare to save logits + if self.opt.asr_save_feats: + self.all_feats = [] + + # the extracted features + # use a loop queue to efficiently record endless features: [f--t---][-------][-------] + self.feat_buffer_size = 4 + self.feat_buffer_idx = 0 + self.feat_queue = torch.zeros(self.feat_buffer_size * self.context_size, self.audio_dim, dtype=torch.float32, device=self.device) + + # TODO: hard coded 16 and 8 window size... + self.front = self.feat_buffer_size * self.context_size - 8 # fake padding + self.tail = 8 + # attention window... + self.att_feats = [torch.zeros(self.audio_dim, 16, dtype=torch.float32, device=self.device)] * 4 # 4 zero padding... + + # warm up steps needed: mid + right + window_size + attention_size + self.warm_up_steps = self.context_size + self.stride_right_size + 8 + 2 * 3 + + self.listening = False + self.playing = False + + def listen(self): + # start + if self.mode == 'live' and not self.listening: + print(f'[INFO] starting read frame thread...') + self.process_read_frame.start() + self.listening = True + + if self.play and not self.playing: + print(f'[INFO] starting play frame thread...') + self.process_play_frame.start() + self.playing = True + + def stop(self): + + self.exit_event.set() + + if self.play: + self.output_stream.stop_stream() + self.output_stream.close() + if self.playing: + self.process_play_frame.join() + self.playing = False + + if self.mode == 'live': + self.input_stream.stop_stream() + self.input_stream.close() + if self.listening: + self.process_read_frame.join() + self.listening = False + + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + + self.stop() + + if self.mode == 'live': + # live mode: also print the result text. + self.text += '\n[END]' + print(self.text) + + def get_next_feat(self): + # return a [1/8, 16] window, for the next input to nerf side. + + while len(self.att_feats) < 8: + # [------f+++t-----] + if self.front < self.tail: + feat = self.feat_queue[self.front:self.tail] + # [++t-----------f+] + else: + feat = torch.cat([self.feat_queue[self.front:], self.feat_queue[:self.tail]], dim=0) + + self.front = (self.front + 2) % self.feat_queue.shape[0] + self.tail = (self.tail + 2) % self.feat_queue.shape[0] + + # print(self.front, self.tail, feat.shape) + + self.att_feats.append(feat.permute(1, 0)) + + att_feat = torch.stack(self.att_feats, dim=0) # [8, 44, 16] + + # discard old + self.att_feats = self.att_feats[1:] + + return att_feat + + def run_step(self): + + if self.terminated: + return + + # get a frame of audio + frame = self.get_audio_frame() + + # the last frame + if frame is None: + # terminate, but always run the network for the left frames + self.terminated = True + else: + self.frames.append(frame) + # put to output + if self.play: + self.output_queue.put(frame) + # context not enough, do not run network. + if len(self.frames) < self.stride_left_size + self.context_size + self.stride_right_size: + return + + inputs = np.concatenate(self.frames) # [N * chunk] + + # discard the old part to save memory + if not self.terminated: + self.frames = self.frames[-(self.stride_left_size + self.stride_right_size):] + + logits, labels, text = self.frame_to_text(inputs) + feats = logits # better lips-sync than labels + + # save feats + if self.opt.asr_save_feats: + self.all_feats.append(feats) + + # record the feats efficiently.. (no concat, constant memory) + start = self.feat_buffer_idx * self.context_size + end = start + feats.shape[0] + self.feat_queue[start:end] = feats + self.feat_buffer_idx = (self.feat_buffer_idx + 1) % self.feat_buffer_size + + # very naive, just concat the text output. + if text != '': + self.text = self.text + ' ' + text + + # will only run once at ternimation + if self.terminated: + self.text += '\n[END]' + print(self.text) + if self.opt.asr_save_feats: + print(f'[INFO] save all feats for training purpose... ') + feats = torch.cat(self.all_feats, dim=0) # [N, C] + # print('[INFO] before unfold', feats.shape) + window_size = 16 + padding = window_size // 2 + feats = feats.view(-1, self.audio_dim).permute(1, 0).contiguous() # [C, M] + feats = feats.view(1, self.audio_dim, -1, 1) # [1, C, M, 1] + unfold_feats = F.unfold(feats, kernel_size=(window_size, 1), padding=(padding, 0), stride=(2, 1)) # [1, C * window_size, M / 2 + 1] + unfold_feats = unfold_feats.view(self.audio_dim, window_size, -1).permute(2, 1, 0).contiguous() # [C, window_size, M / 2 + 1] --> [M / 2 + 1, window_size, C] + # print('[INFO] after unfold', unfold_feats.shape) + # save to a npy file + if 'esperanto' in self.opt.asr_model: + output_path = self.opt.asr_wav.replace('.wav', '_eo.npy') + else: + output_path = self.opt.asr_wav.replace('.wav', '.npy') + np.save(output_path, unfold_feats.cpu().numpy()) + print(f"[INFO] saved logits to {output_path}") + + def create_file_stream(self): + + stream, sample_rate = sf.read(self.opt.asr_wav) # [T*sample_rate,] float64 + stream = stream.astype(np.float32) + + if stream.ndim > 1: + print(f'[WARN] audio has {stream.shape[1]} channels, only use the first.') + stream = stream[:, 0] + + if sample_rate != self.sample_rate: + print(f'[WARN] audio sample rate is {sample_rate}, resampling into {self.sample_rate}.') + stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self.sample_rate) + + print(f'[INFO] loaded audio stream {self.opt.asr_wav}: {stream.shape}') + + return stream + + + def create_pyaudio_stream(self): + + import pyaudio + + print(f'[INFO] creating live audio stream ...') + + audio = pyaudio.PyAudio() + + # get devices + info = audio.get_host_api_info_by_index(0) + n_devices = info.get('deviceCount') + + for i in range(0, n_devices): + if (audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0: + name = audio.get_device_info_by_host_api_device_index(0, i).get('name') + print(f'[INFO] choose audio device {name}, id {i}') + break + + # get stream + stream = audio.open(input_device_index=i, + format=pyaudio.paInt16, + channels=1, + rate=self.sample_rate, + input=True, + frames_per_buffer=self.chunk) + + return audio, stream + + + def get_audio_frame(self): + + if self.mode == 'file': + + if self.idx < self.file_stream.shape[0]: + frame = self.file_stream[self.idx: self.idx + self.chunk] + self.idx = self.idx + self.chunk + return frame + else: + return None + + else: + + frame = self.queue.get() + # print(f'[INFO] get frame {frame.shape}') + + self.idx = self.idx + self.chunk + + return frame + + + def frame_to_text(self, frame): + # frame: [N * 320], N = (context_size + 2 * stride_size) + + inputs = self.processor(frame, sampling_rate=self.sample_rate, return_tensors="pt", padding=True) + + with torch.no_grad(): + result = self.model(inputs.input_values.to(self.device)) + logits = result.logits # [1, N - 1, 32] + + # cut off stride + left = max(0, self.stride_left_size) + right = min(logits.shape[1], logits.shape[1] - self.stride_right_size + 1) # +1 to make sure output is the same length as input. + + # do not cut right if terminated. + if self.terminated: + right = logits.shape[1] + + logits = logits[:, left:right] + + # print(frame.shape, inputs.input_values.shape, logits.shape) + + predicted_ids = torch.argmax(logits, dim=-1) + transcription = self.processor.batch_decode(predicted_ids)[0].lower() + + + # for esperanto + # labels = np.array(['ŭ', '»', 'c', 'ĵ', 'ñ', '”', '„', '“', 'ǔ', 'o', 'ĝ', 'm', 'k', 'd', 'a', 'ŝ', 'z', 'i', '«', '—', '‘', 'ĥ', 'f', 'y', 'h', 'j', '|', 'r', 'u', 'ĉ', 's', '–', 'fi', 'l', 'p', '’', 'g', 'v', 't', 'b', 'n', 'e', '[UNK]', '[PAD]']) + + # labels = np.array([' ', ' ', ' ', '-', '|', 'E', 'T', 'A', 'O', 'N', 'I', 'H', 'S', 'R', 'D', 'L', 'U', 'M', 'W', 'C', 'F', 'G', 'Y', 'P', 'B', 'V', 'K', "'", 'X', 'J', 'Q', 'Z']) + # print(''.join(labels[predicted_ids[0].detach().cpu().long().numpy()])) + # print(predicted_ids[0]) + # print(transcription) + + return logits[0], predicted_ids[0], transcription # [N,] + + + def run(self): + + self.listen() + + while not self.terminated: + self.run_step() + + def clear_queue(self): + # clear the queue, to reduce potential latency... + print(f'[INFO] clear queue') + if self.mode == 'live': + self.queue.queue.clear() + if self.play: + self.output_queue.queue.clear() + + def warm_up(self): + + self.listen() + + print(f'[INFO] warm up ASR live model, expected latency = {self.warm_up_steps / self.fps:.6f}s') + t = time.time() + for _ in range(self.warm_up_steps): + self.run_step() + if torch.cuda.is_available(): + torch.cuda.synchronize() + t = time.time() - t + print(f'[INFO] warm-up done, actual latency = {t:.6f}s') + + self.clear_queue() + + + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--wav', type=str, default='') + parser.add_argument('--play', action='store_true', help="play out the audio") + + parser.add_argument('--model', type=str, default='cpierse/wav2vec2-large-xlsr-53-esperanto') + # parser.add_argument('--model', type=str, default='facebook/wav2vec2-large-960h-lv60-self') + + parser.add_argument('--save_feats', action='store_true') + # audio FPS + parser.add_argument('--fps', type=int, default=50) + # sliding window left-middle-right length. + parser.add_argument('-l', type=int, default=10) + parser.add_argument('-m', type=int, default=50) + parser.add_argument('-r', type=int, default=10) + + opt = parser.parse_args() + + # fix + opt.asr_wav = opt.wav + opt.asr_play = opt.play + opt.asr_model = opt.model + opt.asr_save_feats = opt.save_feats + + if 'deepspeech' in opt.asr_model: + raise ValueError("DeepSpeech features should not use this code to extract...") + + with ASR(opt) as asr: + asr.run() \ No newline at end of file diff --git a/nerf_triplane/gui.py b/nerf_triplane/gui.py new file mode 100644 index 0000000..7a6798d --- /dev/null +++ b/nerf_triplane/gui.py @@ -0,0 +1,565 @@ +import math +import torch +import numpy as np +import dearpygui.dearpygui as dpg +from scipy.spatial.transform import Rotation as R + +from .utils import * + +from .asr import ASR + + +class OrbitCamera: + def __init__(self, W, H, r=2, fovy=60): + self.W = W + self.H = H + self.radius = r # camera distance from center + self.fovy = fovy # in degree + self.center = np.array([0, 0, 0], dtype=np.float32) # look at this point + self.rot = R.from_matrix([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) # init camera matrix: [[1, 0, 0], [0, -1, 0], [0, 0, 1]] (to suit ngp convention) + self.up = np.array([1, 0, 0], dtype=np.float32) # need to be normalized! + + # pose + @property + def pose(self): + # first move camera to radius + res = np.eye(4, dtype=np.float32) + res[2, 3] -= self.radius + # rotate + rot = np.eye(4, dtype=np.float32) + rot[:3, :3] = self.rot.as_matrix() + res = rot @ res + # translate + res[:3, 3] -= self.center + return res + + def update_pose(self, pose): + # pose: [4, 4] numpy array + # assert self.center is 0 + self.radius = np.linalg.norm(pose[:3, 3]) + T = np.eye(4) + T[2, 3] = -self.radius + rot = pose @ np.linalg.inv(T) + self.rot = R.from_matrix(rot[:3, :3]) + + def update_intrinsics(self, intrinsics): + fl_x, fl_y, cx, cy = intrinsics + self.W = int(cx * 2) + self.H = int(cy * 2) + self.fovy = np.rad2deg(2 * np.arctan2(self.H, 2 * fl_y)) + + # intrinsics + @property + def intrinsics(self): + focal = self.H / (2 * np.tan(np.deg2rad(self.fovy) / 2)) + return np.array([focal, focal, self.W // 2, self.H // 2]) + + def orbit(self, dx, dy): + # rotate along camera up/side axis! + side = self.rot.as_matrix()[:3, 0] # why this is side --> ? # already normalized. + rotvec_x = self.up * np.radians(-0.01 * dx) + rotvec_y = side * np.radians(-0.01 * dy) + self.rot = R.from_rotvec(rotvec_x) * R.from_rotvec(rotvec_y) * self.rot + + def scale(self, delta): + self.radius *= 1.1 ** (-delta) + + def pan(self, dx, dy, dz=0): + # pan in camera coordinate system (careful on the sensitivity!) + self.center += 0.0001 * self.rot.as_matrix()[:3, :3] @ np.array([dx, dy, dz]) + + +class NeRFGUI: + def __init__(self, opt, trainer, data_loader, debug=True): + self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters. + self.W = opt.W + self.H = opt.H + self.cam = OrbitCamera(opt.W, opt.H, r=opt.radius, fovy=opt.fovy) + self.debug = debug + self.training = False + self.step = 0 # training step + + self.trainer = trainer + self.data_loader = data_loader + + # override with dataloader's intrinsics + self.W = data_loader._data.W + self.H = data_loader._data.H + self.cam.update_intrinsics(data_loader._data.intrinsics) + + # use dataloader's pose + pose_init = data_loader._data.poses[0] + self.cam.update_pose(pose_init.detach().cpu().numpy()) + + # use dataloader's bg + bg_img = data_loader._data.bg_img #.view(1, -1, 3) + if self.H != bg_img.shape[0] or self.W != bg_img.shape[1]: + bg_img = F.interpolate(bg_img.permute(2, 0, 1).unsqueeze(0).contiguous(), (self.H, self.W), mode='bilinear').squeeze(0).permute(1, 2, 0).contiguous() + self.bg_color = bg_img.view(1, -1, 3) + + # audio features (from dataloader, only used in non-playing mode) + self.audio_features = data_loader._data.auds # [N, 29, 16] + self.audio_idx = 0 + + # control eye + self.eye_area = None if not self.opt.exp_eye else data_loader._data.eye_area.mean().item() + + # playing seq from dataloader, or pause. + self.playing = False + self.loader = iter(data_loader) + + self.render_buffer = np.zeros((self.W, self.H, 3), dtype=np.float32) + self.need_update = True # camera moved, should reset accumulation + self.spp = 1 # sample per pixel + self.mode = 'image' # choose from ['image', 'depth'] + + self.dynamic_resolution = False # assert False! + self.downscale = 1 + self.train_steps = 16 + + self.ind_index = 0 + self.ind_num = trainer.model.individual_codes.shape[0] + + # build asr + if self.opt.asr: + self.asr = ASR(opt) + + dpg.create_context() + self.register_dpg() + self.test_step() + + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + if self.opt.asr: + self.asr.stop() + dpg.destroy_context() + + def train_step(self): + + starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True) + starter.record() + + outputs = self.trainer.train_gui(self.data_loader, step=self.train_steps) + + ender.record() + torch.cuda.synchronize() + t = starter.elapsed_time(ender) + + self.step += self.train_steps + self.need_update = True + + dpg.set_value("_log_train_time", f'{t:.4f}ms ({int(1000/t)} FPS)') + dpg.set_value("_log_train_log", f'step = {self.step: 5d} (+{self.train_steps: 2d}), loss = {outputs["loss"]:.4f}, lr = {outputs["lr"]:.5f}') + + # dynamic train steps + # max allowed train time per-frame is 500 ms + full_t = t / self.train_steps * 16 + train_steps = min(16, max(4, int(16 * 500 / full_t))) + if train_steps > self.train_steps * 1.2 or train_steps < self.train_steps * 0.8: + self.train_steps = train_steps + + def prepare_buffer(self, outputs): + if self.mode == 'image': + return outputs['image'] + else: + return np.expand_dims(outputs['depth'], -1).repeat(3, -1) + + def test_step(self): + + if self.need_update or self.spp < self.opt.max_spp: + + starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True) + starter.record() + + if self.playing: + try: + data = next(self.loader) + except StopIteration: + self.loader = iter(self.data_loader) + data = next(self.loader) + + if self.opt.asr: + # use the live audio stream + data['auds'] = self.asr.get_next_feat() + + outputs = self.trainer.test_gui_with_data(data, self.W, self.H) + + # sync local camera pose + self.cam.update_pose(data['poses_matrix'][0].detach().cpu().numpy()) + + else: + if self.audio_features is not None: + auds = get_audio_features(self.audio_features, self.opt.att, self.audio_idx) + else: + auds = None + outputs = self.trainer.test_gui(self.cam.pose, self.cam.intrinsics, self.W, self.H, auds, self.eye_area, self.ind_index, self.bg_color, self.spp, self.downscale) + + ender.record() + torch.cuda.synchronize() + t = starter.elapsed_time(ender) + + # update dynamic resolution + if self.dynamic_resolution: + # max allowed infer time per-frame is 200 ms + full_t = t / (self.downscale ** 2) + downscale = min(1, max(1/4, math.sqrt(200 / full_t))) + if downscale > self.downscale * 1.2 or downscale < self.downscale * 0.8: + self.downscale = downscale + + if self.need_update: + self.render_buffer = self.prepare_buffer(outputs) + self.spp = 1 + self.need_update = False + else: + self.render_buffer = (self.render_buffer * self.spp + self.prepare_buffer(outputs)) / (self.spp + 1) + self.spp += 1 + + if self.playing: + self.need_update = True + + dpg.set_value("_log_infer_time", f'{t:.4f}ms ({int(1000/t)} FPS)') + dpg.set_value("_log_resolution", f'{int(self.downscale * self.W)}x{int(self.downscale * self.H)}') + dpg.set_value("_log_spp", self.spp) + dpg.set_value("_texture", self.render_buffer) + + + def register_dpg(self): + + ### register texture + + with dpg.texture_registry(show=False): + dpg.add_raw_texture(self.W, self.H, self.render_buffer, format=dpg.mvFormat_Float_rgb, tag="_texture") + + ### register window + + # the rendered image, as the primary window + with dpg.window(tag="_primary_window", width=self.W, height=self.H): + + # add the texture + dpg.add_image("_texture") + + # dpg.set_primary_window("_primary_window", True) + + dpg.show_tool(dpg.mvTool_Metrics) + + # control window + with dpg.window(label="Control", tag="_control_window", width=400, height=300): + + # button theme + with dpg.theme() as theme_button: + with dpg.theme_component(dpg.mvButton): + dpg.add_theme_color(dpg.mvThemeCol_Button, (23, 3, 18)) + dpg.add_theme_color(dpg.mvThemeCol_ButtonHovered, (51, 3, 47)) + dpg.add_theme_color(dpg.mvThemeCol_ButtonActive, (83, 18, 83)) + dpg.add_theme_style(dpg.mvStyleVar_FrameRounding, 5) + dpg.add_theme_style(dpg.mvStyleVar_FramePadding, 3, 3) + + # time + if not self.opt.test: + with dpg.group(horizontal=True): + dpg.add_text("Train time: ") + dpg.add_text("no data", tag="_log_train_time") + + with dpg.group(horizontal=True): + dpg.add_text("Infer time: ") + dpg.add_text("no data", tag="_log_infer_time") + + with dpg.group(horizontal=True): + dpg.add_text("SPP: ") + dpg.add_text("1", tag="_log_spp") + + # train button + if not self.opt.test: + with dpg.collapsing_header(label="Train", default_open=True): + + # train / stop + with dpg.group(horizontal=True): + dpg.add_text("Train: ") + + def callback_train(sender, app_data): + if self.training: + self.training = False + dpg.configure_item("_button_train", label="start") + else: + self.training = True + dpg.configure_item("_button_train", label="stop") + + dpg.add_button(label="start", tag="_button_train", callback=callback_train) + dpg.bind_item_theme("_button_train", theme_button) + + def callback_reset(sender, app_data): + @torch.no_grad() + def weight_reset(m: nn.Module): + reset_parameters = getattr(m, "reset_parameters", None) + if callable(reset_parameters): + m.reset_parameters() + self.trainer.model.apply(fn=weight_reset) + self.trainer.model.reset_extra_state() # for cuda_ray density_grid and step_counter + self.need_update = True + + dpg.add_button(label="reset", tag="_button_reset", callback=callback_reset) + dpg.bind_item_theme("_button_reset", theme_button) + + # save ckpt + with dpg.group(horizontal=True): + dpg.add_text("Checkpoint: ") + + def callback_save(sender, app_data): + self.trainer.save_checkpoint(full=True, best=False) + dpg.set_value("_log_ckpt", "saved " + os.path.basename(self.trainer.stats["checkpoints"][-1])) + self.trainer.epoch += 1 # use epoch to indicate different calls. + + dpg.add_button(label="save", tag="_button_save", callback=callback_save) + dpg.bind_item_theme("_button_save", theme_button) + + dpg.add_text("", tag="_log_ckpt") + + # save mesh + with dpg.group(horizontal=True): + dpg.add_text("Marching Cubes: ") + + def callback_mesh(sender, app_data): + self.trainer.save_mesh(resolution=256, threshold=10) + dpg.set_value("_log_mesh", "saved " + f'{self.trainer.name}_{self.trainer.epoch}.ply') + self.trainer.epoch += 1 # use epoch to indicate different calls. + + dpg.add_button(label="mesh", tag="_button_mesh", callback=callback_mesh) + dpg.bind_item_theme("_button_mesh", theme_button) + + dpg.add_text("", tag="_log_mesh") + + with dpg.group(horizontal=True): + dpg.add_text("", tag="_log_train_log") + + + # rendering options + with dpg.collapsing_header(label="Options", default_open=True): + + # playing + with dpg.group(horizontal=True): + dpg.add_text("Play: ") + + def callback_play(sender, app_data): + + if self.playing: + self.playing = False + dpg.configure_item("_button_play", label="start") + else: + self.playing = True + dpg.configure_item("_button_play", label="stop") + if self.opt.asr: + self.asr.warm_up() + self.need_update = True + + dpg.add_button(label="start", tag="_button_play", callback=callback_play) + dpg.bind_item_theme("_button_play", theme_button) + + # set asr + if self.opt.asr: + + # clear queue button + def callback_clear_queue(sender, app_data): + + self.asr.clear_queue() + self.need_update = True + + dpg.add_button(label="clear", tag="_button_clear_queue", callback=callback_clear_queue) + dpg.bind_item_theme("_button_clear_queue", theme_button) + + # dynamic rendering resolution + with dpg.group(horizontal=True): + + def callback_set_dynamic_resolution(sender, app_data): + if self.dynamic_resolution: + self.dynamic_resolution = False + self.downscale = 1 + else: + self.dynamic_resolution = True + self.need_update = True + + # Disable dynamic resolution for face. + # dpg.add_checkbox(label="dynamic resolution", default_value=self.dynamic_resolution, callback=callback_set_dynamic_resolution) + dpg.add_text(f"{self.W}x{self.H}", tag="_log_resolution") + + # mode combo + def callback_change_mode(sender, app_data): + self.mode = app_data + self.need_update = True + + dpg.add_combo(('image', 'depth'), label='mode', default_value=self.mode, callback=callback_change_mode) + + + # bg_color picker + def callback_change_bg(sender, app_data): + self.bg_color = torch.tensor(app_data[:3], dtype=torch.float32) # only need RGB in [0, 1] + self.need_update = True + + dpg.add_color_edit((255, 255, 255), label="Background Color", width=200, tag="_color_editor", no_alpha=True, callback=callback_change_bg) + + # audio index slider + if not self.opt.asr: + def callback_set_audio_index(sender, app_data): + self.audio_idx = app_data + self.need_update = True + + dpg.add_slider_int(label="Audio", min_value=0, max_value=self.audio_features.shape[0] - 1, format="%d", default_value=self.audio_idx, callback=callback_set_audio_index) + + # ind code index slider + if self.opt.ind_dim > 0: + def callback_set_individual_code(sender, app_data): + self.ind_index = app_data + self.need_update = True + + dpg.add_slider_int(label="Individual", min_value=0, max_value=self.ind_num - 1, format="%d", default_value=self.ind_index, callback=callback_set_individual_code) + + # eye area slider + if self.opt.exp_eye: + def callback_set_eye(sender, app_data): + self.eye_area = app_data + self.need_update = True + + dpg.add_slider_float(label="eye area", min_value=0, max_value=0.5, format="%.2f percent", default_value=self.eye_area, callback=callback_set_eye) + + # fov slider + def callback_set_fovy(sender, app_data): + self.cam.fovy = app_data + self.need_update = True + + dpg.add_slider_int(label="FoV (vertical)", min_value=1, max_value=120, format="%d deg", default_value=self.cam.fovy, callback=callback_set_fovy) + + # dt_gamma slider + def callback_set_dt_gamma(sender, app_data): + self.opt.dt_gamma = app_data + self.need_update = True + + dpg.add_slider_float(label="dt_gamma", min_value=0, max_value=0.1, format="%.5f", default_value=self.opt.dt_gamma, callback=callback_set_dt_gamma) + + # max_steps slider + def callback_set_max_steps(sender, app_data): + self.opt.max_steps = app_data + self.need_update = True + + dpg.add_slider_int(label="max steps", min_value=1, max_value=1024, format="%d", default_value=self.opt.max_steps, callback=callback_set_max_steps) + + # aabb slider + def callback_set_aabb(sender, app_data, user_data): + # user_data is the dimension for aabb (xmin, ymin, zmin, xmax, ymax, zmax) + self.trainer.model.aabb_infer[user_data] = app_data + + # also change train aabb ? [better not...] + #self.trainer.model.aabb_train[user_data] = app_data + + self.need_update = True + + dpg.add_separator() + dpg.add_text("Axis-aligned bounding box:") + + with dpg.group(horizontal=True): + dpg.add_slider_float(label="x", width=150, min_value=-self.opt.bound, max_value=0, format="%.2f", default_value=-self.opt.bound, callback=callback_set_aabb, user_data=0) + dpg.add_slider_float(label="", width=150, min_value=0, max_value=self.opt.bound, format="%.2f", default_value=self.opt.bound, callback=callback_set_aabb, user_data=3) + + with dpg.group(horizontal=True): + dpg.add_slider_float(label="y", width=150, min_value=-self.opt.bound, max_value=0, format="%.2f", default_value=-self.opt.bound, callback=callback_set_aabb, user_data=1) + dpg.add_slider_float(label="", width=150, min_value=0, max_value=self.opt.bound, format="%.2f", default_value=self.opt.bound, callback=callback_set_aabb, user_data=4) + + with dpg.group(horizontal=True): + dpg.add_slider_float(label="z", width=150, min_value=-self.opt.bound, max_value=0, format="%.2f", default_value=-self.opt.bound, callback=callback_set_aabb, user_data=2) + dpg.add_slider_float(label="", width=150, min_value=0, max_value=self.opt.bound, format="%.2f", default_value=self.opt.bound, callback=callback_set_aabb, user_data=5) + + + # debug info + if self.debug: + with dpg.collapsing_header(label="Debug"): + # pose + dpg.add_separator() + dpg.add_text("Camera Pose:") + dpg.add_text(str(self.cam.pose), tag="_log_pose") + + + ### register camera handler + + def callback_camera_drag_rotate(sender, app_data): + + if not dpg.is_item_focused("_primary_window"): + return + + dx = app_data[1] + dy = app_data[2] + + self.cam.orbit(dx, dy) + self.need_update = True + + if self.debug: + dpg.set_value("_log_pose", str(self.cam.pose)) + + + def callback_camera_wheel_scale(sender, app_data): + + if not dpg.is_item_focused("_primary_window"): + return + + delta = app_data + + self.cam.scale(delta) + self.need_update = True + + if self.debug: + dpg.set_value("_log_pose", str(self.cam.pose)) + + + def callback_camera_drag_pan(sender, app_data): + + if not dpg.is_item_focused("_primary_window"): + return + + dx = app_data[1] + dy = app_data[2] + + self.cam.pan(dx, dy) + self.need_update = True + + if self.debug: + dpg.set_value("_log_pose", str(self.cam.pose)) + + + with dpg.handler_registry(): + dpg.add_mouse_drag_handler(button=dpg.mvMouseButton_Left, callback=callback_camera_drag_rotate) + dpg.add_mouse_wheel_handler(callback=callback_camera_wheel_scale) + dpg.add_mouse_drag_handler(button=dpg.mvMouseButton_Middle, callback=callback_camera_drag_pan) + + + dpg.create_viewport(title='RAD-NeRF', width=1080, height=720, resizable=True) + + ### global theme + with dpg.theme() as theme_no_padding: + with dpg.theme_component(dpg.mvAll): + # set all padding to 0 to avoid scroll bar + dpg.add_theme_style(dpg.mvStyleVar_WindowPadding, 0, 0, category=dpg.mvThemeCat_Core) + dpg.add_theme_style(dpg.mvStyleVar_FramePadding, 0, 0, category=dpg.mvThemeCat_Core) + dpg.add_theme_style(dpg.mvStyleVar_CellPadding, 0, 0, category=dpg.mvThemeCat_Core) + + dpg.bind_item_theme("_primary_window", theme_no_padding) + + dpg.setup_dearpygui() + + #dpg.show_metrics() + + dpg.show_viewport() + + + def render(self): + + while dpg.is_dearpygui_running(): + # update texture every frame + if self.training: + self.train_step() + # audio stream thread... + if self.opt.asr and self.playing: + # run 2 ASR steps (audio is at 50FPS, video is at 25FPS) + for _ in range(2): + self.asr.run_step() + self.test_step() + dpg.render_dearpygui_frame() \ No newline at end of file diff --git a/nerf_triplane/network.py b/nerf_triplane/network.py new file mode 100644 index 0000000..fc41359 --- /dev/null +++ b/nerf_triplane/network.py @@ -0,0 +1,352 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from encoding import get_encoder +from .renderer import NeRFRenderer + +# Audio feature extractor +class AudioAttNet(nn.Module): + def __init__(self, dim_aud=64, seq_len=8): + super(AudioAttNet, self).__init__() + self.seq_len = seq_len + self.dim_aud = dim_aud + self.attentionConvNet = nn.Sequential( # b x subspace_dim x seq_len + nn.Conv1d(self.dim_aud, 16, kernel_size=3, stride=1, padding=1, bias=True), + nn.LeakyReLU(0.02, True), + nn.Conv1d(16, 8, kernel_size=3, stride=1, padding=1, bias=True), + nn.LeakyReLU(0.02, True), + nn.Conv1d(8, 4, kernel_size=3, stride=1, padding=1, bias=True), + nn.LeakyReLU(0.02, True), + nn.Conv1d(4, 2, kernel_size=3, stride=1, padding=1, bias=True), + nn.LeakyReLU(0.02, True), + nn.Conv1d(2, 1, kernel_size=3, stride=1, padding=1, bias=True), + nn.LeakyReLU(0.02, True) + ) + self.attentionNet = nn.Sequential( + nn.Linear(in_features=self.seq_len, out_features=self.seq_len, bias=True), + nn.Softmax(dim=1) + ) + + def forward(self, x): + # x: [1, seq_len, dim_aud] + y = x.permute(0, 2, 1) # [1, dim_aud, seq_len] + y = self.attentionConvNet(y) + y = self.attentionNet(y.view(1, self.seq_len)).view(1, self.seq_len, 1) + return torch.sum(y * x, dim=1) # [1, dim_aud] + + +# Audio feature extractor +class AudioNet(nn.Module): + def __init__(self, dim_in=29, dim_aud=64, win_size=16): + super(AudioNet, self).__init__() + self.win_size = win_size + self.dim_aud = dim_aud + self.encoder_conv = nn.Sequential( # n x 29 x 16 + nn.Conv1d(dim_in, 32, kernel_size=3, stride=2, padding=1, bias=True), # n x 32 x 8 + nn.LeakyReLU(0.02, True), + nn.Conv1d(32, 32, kernel_size=3, stride=2, padding=1, bias=True), # n x 32 x 4 + nn.LeakyReLU(0.02, True), + nn.Conv1d(32, 64, kernel_size=3, stride=2, padding=1, bias=True), # n x 64 x 2 + nn.LeakyReLU(0.02, True), + nn.Conv1d(64, 64, kernel_size=3, stride=2, padding=1, bias=True), # n x 64 x 1 + nn.LeakyReLU(0.02, True), + ) + self.encoder_fc1 = nn.Sequential( + nn.Linear(64, 64), + nn.LeakyReLU(0.02, True), + nn.Linear(64, dim_aud), + ) + + def forward(self, x): + half_w = int(self.win_size/2) + x = x[:, :, 8-half_w:8+half_w] + x = self.encoder_conv(x).squeeze(-1) + x = self.encoder_fc1(x) + return x + + +class MLP(nn.Module): + def __init__(self, dim_in, dim_out, dim_hidden, num_layers): + super().__init__() + self.dim_in = dim_in + self.dim_out = dim_out + self.dim_hidden = dim_hidden + self.num_layers = num_layers + + net = [] + for l in range(num_layers): + net.append(nn.Linear(self.dim_in if l == 0 else self.dim_hidden, self.dim_out if l == num_layers - 1 else self.dim_hidden, bias=False)) + + self.net = nn.ModuleList(net) + + def forward(self, x): + for l in range(self.num_layers): + x = self.net[l](x) + if l != self.num_layers - 1: + x = F.relu(x, inplace=True) + # x = F.dropout(x, p=0.1, training=self.training) + + return x + + +class NeRFNetwork(NeRFRenderer): + def __init__(self, + opt, + # torso net (hard coded for now) + ): + super().__init__(opt) + + # audio embedding + self.emb = self.opt.emb + + if 'esperanto' in self.opt.asr_model: + self.audio_in_dim = 44 + elif 'deepspeech' in self.opt.asr_model: + self.audio_in_dim = 29 + else: + self.audio_in_dim = 32 + + if self.emb: + self.embedding = nn.Embedding(self.audio_in_dim, self.audio_in_dim) + + # audio network + audio_dim = 32 + self.audio_dim = audio_dim + self.audio_net = AudioNet(self.audio_in_dim, self.audio_dim) + + self.att = self.opt.att + if self.att > 0: + self.audio_att_net = AudioAttNet(self.audio_dim) + + # DYNAMIC PART + self.num_levels = 12 + self.level_dim = 1 + self.encoder_xy, self.in_dim_xy = get_encoder('hashgrid', input_dim=2, num_levels=self.num_levels, level_dim=self.level_dim, base_resolution=64, log2_hashmap_size=14, desired_resolution=512 * self.bound) + self.encoder_yz, self.in_dim_yz = get_encoder('hashgrid', input_dim=2, num_levels=self.num_levels, level_dim=self.level_dim, base_resolution=64, log2_hashmap_size=14, desired_resolution=512 * self.bound) + self.encoder_xz, self.in_dim_xz = get_encoder('hashgrid', input_dim=2, num_levels=self.num_levels, level_dim=self.level_dim, base_resolution=64, log2_hashmap_size=14, desired_resolution=512 * self.bound) + + self.in_dim = self.in_dim_xy + self.in_dim_yz + self.in_dim_xz + + ## sigma network + self.num_layers = 3 + self.hidden_dim = 64 + self.geo_feat_dim = 64 + self.eye_att_net = MLP(self.in_dim, 1, 16, 2) + self.eye_dim = 1 if self.exp_eye else 0 + self.sigma_net = MLP(self.in_dim + self.audio_dim + self.eye_dim, 1 + self.geo_feat_dim, self.hidden_dim, self.num_layers) + ## color network + self.num_layers_color = 2 + self.hidden_dim_color = 64 + self.encoder_dir, self.in_dim_dir = get_encoder('spherical_harmonics') + self.color_net = MLP(self.in_dim_dir + self.geo_feat_dim + self.individual_dim, 3, self.hidden_dim_color, self.num_layers_color) + + self.unc_net = MLP(self.in_dim, 1, 32, 2) + + self.aud_ch_att_net = MLP(self.in_dim, self.audio_dim, 64, 2) + + self.testing = False + + if self.torso: + # torso deform network + self.register_parameter('anchor_points', + nn.Parameter(torch.tensor([[0.01, 0.01, 0.1, 1], [-0.1, -0.1, 0.1, 1], [0.1, -0.1, 0.1, 1]]))) + self.torso_deform_encoder, self.torso_deform_in_dim = get_encoder('frequency', input_dim=2, multires=8) + # self.torso_deform_encoder, self.torso_deform_in_dim = get_encoder('tiledgrid', input_dim=2, num_levels=16, level_dim=1, base_resolution=16, log2_hashmap_size=16, desired_resolution=512) + self.anchor_encoder, self.anchor_in_dim = get_encoder('frequency', input_dim=6, multires=3) + self.torso_deform_net = MLP(self.torso_deform_in_dim + self.anchor_in_dim + self.individual_dim_torso, 2, 32, 3) + + # torso color network + self.torso_encoder, self.torso_in_dim = get_encoder('tiledgrid', input_dim=2, num_levels=16, level_dim=2, base_resolution=16, log2_hashmap_size=16, desired_resolution=2048) + self.torso_net = MLP(self.torso_in_dim + self.torso_deform_in_dim + self.anchor_in_dim + self.individual_dim_torso, 4, 32, 3) + + + def forward_torso(self, x, poses, c=None): + # x: [N, 2] in [-1, 1] + # head poses: [1, 4, 4] + # c: [1, ind_dim], individual code + + # test: shrink x + x = x * self.opt.torso_shrink + + # deformation-based + wrapped_anchor = self.anchor_points[None, ...] @ poses.permute(0, 2, 1).inverse() + wrapped_anchor = (wrapped_anchor[:, :, :2] / wrapped_anchor[:, :, 3, None] / wrapped_anchor[:, :, 2, None]).view(1, -1) + # print(wrapped_anchor) + # enc_pose = self.pose_encoder(poses) + enc_anchor = self.anchor_encoder(wrapped_anchor) + enc_x = self.torso_deform_encoder(x) + + if c is not None: + h = torch.cat([enc_x, enc_anchor.repeat(x.shape[0], 1), c.repeat(x.shape[0], 1)], dim=-1) + else: + h = torch.cat([enc_x, enc_anchor.repeat(x.shape[0], 1)], dim=-1) + + dx = self.torso_deform_net(h) + + x = (x + dx).clamp(-1, 1) + + x = self.torso_encoder(x, bound=1) + + # h = torch.cat([x, h, enc_a.repeat(x.shape[0], 1)], dim=-1) + h = torch.cat([x, h], dim=-1) + + h = self.torso_net(h) + + alpha = torch.sigmoid(h[..., :1])*(1 + 2*0.001) - 0.001 + color = torch.sigmoid(h[..., 1:])*(1 + 2*0.001) - 0.001 + + return alpha, color, dx + + + @staticmethod + @torch.jit.script + def split_xyz(x): + xy, yz, xz = x[:, :-1], x[:, 1:], torch.cat([x[:,:1], x[:,-1:]], dim=-1) + return xy, yz, xz + + + def encode_x(self, xyz, bound): + # x: [N, 3], in [-bound, bound] + N, M = xyz.shape + xy, yz, xz = self.split_xyz(xyz) + feat_xy = self.encoder_xy(xy, bound=bound) + feat_yz = self.encoder_yz(yz, bound=bound) + feat_xz = self.encoder_xz(xz, bound=bound) + + return torch.cat([feat_xy, feat_yz, feat_xz], dim=-1) + + + def encode_audio(self, a): + # a: [1, 29, 16] or [8, 29, 16], audio features from deepspeech + # if emb, a should be: [1, 16] or [8, 16] + + # fix audio traininig + if a is None: return None + + if self.emb: + a = self.embedding(a).transpose(-1, -2).contiguous() # [1/8, 29, 16] + + enc_a = self.audio_net(a) # [1/8, 64] + + if self.att > 0: + enc_a = self.audio_att_net(enc_a.unsqueeze(0)) # [1, 64] + + return enc_a + + + def predict_uncertainty(self, unc_inp): + if self.testing or not self.opt.unc_loss: + unc = torch.zeros_like(unc_inp) + else: + unc = self.unc_net(unc_inp.detach()) + + return unc + + + def forward(self, x, d, enc_a, c, e=None): + # x: [N, 3], in [-bound, bound] + # d: [N, 3], nomalized in [-1, 1] + # enc_a: [1, aud_dim] + # c: [1, ind_dim], individual code + # e: [1, 1], eye feature + enc_x = self.encode_x(x, bound=self.bound) + + sigma_result = self.density(x, enc_a, e, enc_x) + sigma = sigma_result['sigma'] + geo_feat = sigma_result['geo_feat'] + aud_ch_att = sigma_result['ambient_aud'] + eye_att = sigma_result['ambient_eye'] + + # color + enc_d = self.encoder_dir(d) + + if c is not None: + h = torch.cat([enc_d, geo_feat, c.repeat(x.shape[0], 1)], dim=-1) + else: + h = torch.cat([enc_d, geo_feat], dim=-1) + + h_color = self.color_net(h) + color = torch.sigmoid(h_color)*(1 + 2*0.001) - 0.001 + + uncertainty = self.predict_uncertainty(enc_x) + uncertainty = torch.log(1 + torch.exp(uncertainty)) + + return sigma, color, aud_ch_att, eye_att, uncertainty[..., None] + + + def density(self, x, enc_a, e=None, enc_x=None): + # x: [N, 3], in [-bound, bound] + if enc_x is None: + enc_x = self.encode_x(x, bound=self.bound) + + enc_a = enc_a.repeat(enc_x.shape[0], 1) + aud_ch_att = self.aud_ch_att_net(enc_x) + enc_w = enc_a * aud_ch_att + + if e is not None: + # e = self.encoder_eye(e) + eye_att = torch.sigmoid(self.eye_att_net(enc_x)) + e = e * eye_att + # e = e.repeat(enc_x.shape[0], 1) + h = torch.cat([enc_x, enc_w, e], dim=-1) + else: + h = torch.cat([enc_x, enc_w], dim=-1) + + h = self.sigma_net(h) + + sigma = torch.exp(h[..., 0]) + geo_feat = h[..., 1:] + + return { + 'sigma': sigma, + 'geo_feat': geo_feat, + 'ambient_aud' : aud_ch_att.norm(dim=-1, keepdim=True), + 'ambient_eye' : eye_att, + } + + + # optimizer utils + def get_params(self, lr, lr_net, wd=0): + + # ONLY train torso + if self.torso: + params = [ + {'params': self.torso_encoder.parameters(), 'lr': lr}, + {'params': self.torso_deform_encoder.parameters(), 'lr': lr, 'weight_decay': wd}, + {'params': self.torso_net.parameters(), 'lr': lr_net, 'weight_decay': wd}, + {'params': self.torso_deform_net.parameters(), 'lr': lr_net, 'weight_decay': wd}, + {'params': self.anchor_points, 'lr': lr_net, 'weight_decay': wd} + ] + + if self.individual_dim_torso > 0: + params.append({'params': self.individual_codes_torso, 'lr': lr_net, 'weight_decay': wd}) + + return params + + params = [ + {'params': self.audio_net.parameters(), 'lr': lr_net, 'weight_decay': wd}, + + {'params': self.encoder_xy.parameters(), 'lr': lr}, + {'params': self.encoder_yz.parameters(), 'lr': lr}, + {'params': self.encoder_xz.parameters(), 'lr': lr}, + # {'params': self.encoder_xyz.parameters(), 'lr': lr}, + + {'params': self.sigma_net.parameters(), 'lr': lr_net, 'weight_decay': wd}, + {'params': self.color_net.parameters(), 'lr': lr_net, 'weight_decay': wd}, + ] + if self.att > 0: + params.append({'params': self.audio_att_net.parameters(), 'lr': lr_net * 5, 'weight_decay': 0.0001}) + if self.emb: + params.append({'params': self.embedding.parameters(), 'lr': lr}) + if self.individual_dim > 0: + params.append({'params': self.individual_codes, 'lr': lr_net, 'weight_decay': wd}) + if self.train_camera: + params.append({'params': self.camera_dT, 'lr': 1e-5, 'weight_decay': 0}) + params.append({'params': self.camera_dR, 'lr': 1e-5, 'weight_decay': 0}) + + params.append({'params': self.aud_ch_att_net.parameters(), 'lr': lr_net, 'weight_decay': wd}) + params.append({'params': self.unc_net.parameters(), 'lr': lr_net, 'weight_decay': wd}) + params.append({'params': self.eye_att_net.parameters(), 'lr': lr_net, 'weight_decay': wd}) + + return params \ No newline at end of file diff --git a/nerf_triplane/provider.py b/nerf_triplane/provider.py new file mode 100644 index 0000000..f61d92c --- /dev/null +++ b/nerf_triplane/provider.py @@ -0,0 +1,764 @@ +import os +import cv2 +import glob +import json +import tqdm +import numpy as np +from scipy.spatial.transform import Slerp, Rotation +import matplotlib.pyplot as plt + +import trimesh + +import torch +import torch.nn.functional as F +from torch.utils.data import DataLoader + +from .utils import get_audio_features, get_rays, get_bg_coords, convert_poses + +# ref: https://github.com/NVlabs/instant-ngp/blob/b76004c8cf478880227401ae763be4c02f80b62f/include/neural-graphics-primitives/nerf_loader.h#L50 +def nerf_matrix_to_ngp(pose, scale=0.33, offset=[0, 0, 0]): + new_pose = np.array([ + [pose[1, 0], -pose[1, 1], -pose[1, 2], pose[1, 3] * scale + offset[0]], + [pose[2, 0], -pose[2, 1], -pose[2, 2], pose[2, 3] * scale + offset[1]], + [pose[0, 0], -pose[0, 1], -pose[0, 2], pose[0, 3] * scale + offset[2]], + [0, 0, 0, 1], + ], dtype=np.float32) + return new_pose + + +def smooth_camera_path(poses, kernel_size=5): + # smooth the camera trajectory... + # poses: [N, 4, 4], numpy array + + N = poses.shape[0] + K = kernel_size // 2 + + trans = poses[:, :3, 3].copy() # [N, 3] + rots = poses[:, :3, :3].copy() # [N, 3, 3] + + for i in range(N): + start = max(0, i - K) + end = min(N, i + K + 1) + poses[i, :3, 3] = trans[start:end].mean(0) + poses[i, :3, :3] = Rotation.from_matrix(rots[start:end]).mean().as_matrix() + + return poses + +def polygon_area(x, y): + x_ = x - x.mean() + y_ = y - y.mean() + correction = x_[-1] * y_[0] - y_[-1]* x_[0] + main_area = np.dot(x_[:-1], y_[1:]) - np.dot(y_[:-1], x_[1:]) + return 0.5 * np.abs(main_area + correction) + + +def visualize_poses(poses, size=0.1): + # poses: [B, 4, 4] + + print(f'[INFO] visualize poses: {poses.shape}') + + axes = trimesh.creation.axis(axis_length=4) + box = trimesh.primitives.Box(extents=(2, 2, 2)).as_outline() + box.colors = np.array([[128, 128, 128]] * len(box.entities)) + objects = [axes, box] + + for pose in poses: + # a camera is visualized with 8 line segments. + pos = pose[:3, 3] + a = pos + size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2] + b = pos - size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2] + c = pos - size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2] + d = pos + size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2] + + dir = (a + b + c + d) / 4 - pos + dir = dir / (np.linalg.norm(dir) + 1e-8) + o = pos + dir * 3 + + segs = np.array([[pos, a], [pos, b], [pos, c], [pos, d], [a, b], [b, c], [c, d], [d, a], [pos, o]]) + segs = trimesh.load_path(segs) + objects.append(segs) + + trimesh.Scene(objects).show() + + +class NeRFDataset_Test: + def __init__(self, opt, device, downscale=1): + super().__init__() + + self.opt = opt + self.device = device + self.downscale = downscale + self.scale = opt.scale # camera radius scale to make sure camera are inside the bounding box. + self.offset = opt.offset # camera offset + self.bound = opt.bound # bounding box half length, also used as the radius to random sample poses. + self.fp16 = opt.fp16 + + self.start_index = opt.data_range[0] + self.end_index = opt.data_range[1] + + self.training = False + self.num_rays = -1 + + # load nerf-compatible format data. + + with open(opt.pose, 'r') as f: + transform = json.load(f) + + # load image size + self.H = int(transform['cy']) * 2 // downscale + self.W = int(transform['cx']) * 2 // downscale + + # read images + frames = transform["frames"] + + # use a slice of the dataset + if self.end_index == -1: # abuse... + self.end_index = len(frames) + + frames = frames[self.start_index:self.end_index] + + print(f'[INFO] load {len(frames)} frames.') + + # only load pre-calculated aud features when not live-streaming + if not self.opt.asr: + + aud_features = np.load(self.opt.aud) + + aud_features = torch.from_numpy(aud_features) + + # support both [N, 16] labels and [N, 16, K] logits + if len(aud_features.shape) == 3: + aud_features = aud_features.float().permute(0, 2, 1) # [N, 16, 29] --> [N, 29, 16] + + if self.opt.emb: + print(f'[INFO] argmax to aud features {aud_features.shape} for --emb mode') + aud_features = aud_features.argmax(1) # [N, 16] + + else: + assert self.opt.emb, "aud only provide labels, must use --emb" + aud_features = aud_features.long() + + print(f'[INFO] load {self.opt.aud} aud_features: {aud_features.shape}') + + self.poses = [] + self.auds = [] + self.eye_area = [] + + for f in tqdm.tqdm(frames, desc=f'Loading data'): + + pose = np.array(f['transform_matrix'], dtype=np.float32) # [4, 4] + pose = nerf_matrix_to_ngp(pose, scale=self.scale, offset=self.offset) + self.poses.append(pose) + + # find the corresponding audio to the image frame + if not self.opt.asr and self.opt.aud == '': + aud = aud_features[min(f['aud_id'], aud_features.shape[0] - 1)] # careful for the last frame... + self.auds.append(aud) + + if self.opt.exp_eye: + + if 'eye_ratio' in f: + area = f['eye_ratio'] + else: + area = 0.25 # default value for opened eye + + self.eye_area.append(area) + + # load pre-extracted background image (should be the same size as training image...) + + if self.opt.bg_img == 'white': # special + bg_img = np.ones((self.H, self.W, 3), dtype=np.float32) + elif self.opt.bg_img == 'black': # special + bg_img = np.zeros((self.H, self.W, 3), dtype=np.float32) + else: # load from file + bg_img = cv2.imread(self.opt.bg_img, cv2.IMREAD_UNCHANGED) # [H, W, 3] + if bg_img.shape[0] != self.H or bg_img.shape[1] != self.W: + bg_img = cv2.resize(bg_img, (self.W, self.H), interpolation=cv2.INTER_AREA) + bg_img = cv2.cvtColor(bg_img, cv2.COLOR_BGR2RGB) + bg_img = bg_img.astype(np.float32) / 255 # [H, W, 3/4] + + self.bg_img = bg_img + + self.poses = np.stack(self.poses, axis=0) + + # smooth camera path... + if self.opt.smooth_path: + self.poses = smooth_camera_path(self.poses, self.opt.smooth_path_window) + + self.poses = torch.from_numpy(self.poses) # [N, 4, 4] + + if self.opt.asr: + # live streaming, no pre-calculated auds + self.auds = None + else: + # auds corresponding to images + if self.opt.aud == '': + self.auds = torch.stack(self.auds, dim=0) # [N, 32, 16] + # auds is novel, may have a different length with images + else: + self.auds = aud_features + + self.bg_img = torch.from_numpy(self.bg_img) + + if self.opt.exp_eye: + self.eye_area = np.array(self.eye_area, dtype=np.float32) # [N] + print(f'[INFO] eye_area: {self.eye_area.min()} - {self.eye_area.max()}') + + if self.opt.smooth_eye: + + # naive 5 window average + ori_eye = self.eye_area.copy() + for i in range(ori_eye.shape[0]): + start = max(0, i - 1) + end = min(ori_eye.shape[0], i + 2) + self.eye_area[i] = ori_eye[start:end].mean() + + self.eye_area = torch.from_numpy(self.eye_area).view(-1, 1) # [N, 1] + + # always preload + self.poses = self.poses.to(self.device) + + if self.auds is not None: + self.auds = self.auds.to(self.device) + + self.bg_img = self.bg_img.to(torch.half).to(self.device) + + if self.opt.exp_eye: + self.eye_area = self.eye_area.to(self.device) + + # load intrinsics + + fl_x = fl_y = transform['focal_len'] + + cx = (transform['cx'] / downscale) + cy = (transform['cy'] / downscale) + + self.intrinsics = np.array([fl_x, fl_y, cx, cy]) + + # directly build the coordinate meshgrid in [-1, 1]^2 + self.bg_coords = get_bg_coords(self.H, self.W, self.device) # [1, H*W, 2] in [-1, 1] + + def mirror_index(self, index): + size = self.poses.shape[0] + turn = index // size + res = index % size + if turn % 2 == 0: + return res + else: + return size - res - 1 + + def collate(self, index): + + B = len(index) # a list of length 1 + # assert B == 1 + + results = {} + + # audio use the original index + if self.auds is not None: + auds = get_audio_features(self.auds, self.opt.att, index[0]).to(self.device) + results['auds'] = auds + + # head pose and bg image may mirror (replay --> <-- --> <--). + index[0] = self.mirror_index(index[0]) + + poses = self.poses[index].to(self.device) # [B, 4, 4] + + rays = get_rays(poses, self.intrinsics, self.H, self.W, self.num_rays, self.opt.patch_size) + + results['index'] = index # for ind. code + results['H'] = self.H + results['W'] = self.W + results['rays_o'] = rays['rays_o'] + results['rays_d'] = rays['rays_d'] + + if self.opt.exp_eye: + results['eye'] = self.eye_area[index].to(self.device) # [1] + else: + results['eye'] = None + + bg_img = self.bg_img.view(1, -1, 3).repeat(B, 1, 1).to(self.device) + + results['bg_color'] = bg_img + + bg_coords = self.bg_coords # [1, N, 2] + results['bg_coords'] = bg_coords + + # results['poses'] = convert_poses(poses) # [B, 6] + # results['poses_matrix'] = poses # [B, 4, 4] + results['poses'] = poses # [B, 4, 4] + + return results + + def dataloader(self): + + + # test with novel auds, then use its length + if self.auds is not None: + size = self.auds.shape[0] + # live stream test, use 2 * len(poses), so it naturally mirrors. + else: + size = 2 * self.poses.shape[0] + + loader = DataLoader(list(range(size)), batch_size=1, collate_fn=self.collate, shuffle=False, num_workers=0) + loader._data = self # an ugly fix... we need poses in trainer. + + # do evaluate if has gt images and use self-driven setting + loader.has_gt = False + + return loader + + +class NeRFDataset: + def __init__(self, opt, device, type='train', downscale=1): + super().__init__() + + self.opt = opt + self.device = device + self.type = type # train, val, test + self.downscale = downscale + self.root_path = opt.path + self.preload = opt.preload # 0 = disk, 1 = cpu, 2 = gpu + self.scale = opt.scale # camera radius scale to make sure camera are inside the bounding box. + self.offset = opt.offset # camera offset + self.bound = opt.bound # bounding box half length, also used as the radius to random sample poses. + self.fp16 = opt.fp16 + + self.start_index = opt.data_range[0] + self.end_index = opt.data_range[1] + + self.training = self.type in ['train', 'all', 'trainval'] + self.num_rays = self.opt.num_rays if self.training else -1 + + # load nerf-compatible format data. + + # load all splits (train/valid/test) + if type == 'all': + transform_paths = glob.glob(os.path.join(self.root_path, '*.json')) + transform = None + for transform_path in transform_paths: + with open(transform_path, 'r') as f: + tmp_transform = json.load(f) + if transform is None: + transform = tmp_transform + else: + transform['frames'].extend(tmp_transform['frames']) + # load train and val split + elif type == 'trainval': + with open(os.path.join(self.root_path, f'transforms_train.json'), 'r') as f: + transform = json.load(f) + with open(os.path.join(self.root_path, f'transforms_val.json'), 'r') as f: + transform_val = json.load(f) + transform['frames'].extend(transform_val['frames']) + # only load one specified split + else: + # no test, use val as test + _split = 'val' if type == 'test' else type + with open(os.path.join(self.root_path, f'transforms_{_split}.json'), 'r') as f: + transform = json.load(f) + + # load image size + if 'h' in transform and 'w' in transform: + self.H = int(transform['h']) // downscale + self.W = int(transform['w']) // downscale + else: + self.H = int(transform['cy']) * 2 // downscale + self.W = int(transform['cx']) * 2 // downscale + + # read images + frames = transform["frames"] + + # use a slice of the dataset + if self.end_index == -1: # abuse... + self.end_index = len(frames) + + frames = frames[self.start_index:self.end_index] + + # use a subset of dataset. + if type == 'train': + if self.opt.part: + frames = frames[::10] # 1/10 frames + elif self.opt.part2: + frames = frames[:375] # first 15s + elif type == 'val': + frames = frames[:100] # first 100 frames for val + + print(f'[INFO] load {len(frames)} {type} frames.') + + # only load pre-calculated aud features when not live-streaming + if not self.opt.asr: + + # empty means the default self-driven extracted features. + if self.opt.aud == '': + if 'esperanto' in self.opt.asr_model: + aud_features = np.load(os.path.join(self.root_path, 'aud_eo.npy')) + elif 'deepspeech' in self.opt.asr_model: + aud_features = np.load(os.path.join(self.root_path, 'aud_ds.npy')) + else: + aud_features = np.load(os.path.join(self.root_path, 'aud.npy')) + # cross-driven extracted features. + else: + aud_features = np.load(self.opt.aud) + + aud_features = torch.from_numpy(aud_features) + + # support both [N, 16] labels and [N, 16, K] logits + if len(aud_features.shape) == 3: + aud_features = aud_features.float().permute(0, 2, 1) # [N, 16, 29] --> [N, 29, 16] + + if self.opt.emb: + print(f'[INFO] argmax to aud features {aud_features.shape} for --emb mode') + aud_features = aud_features.argmax(1) # [N, 16] + + else: + assert self.opt.emb, "aud only provide labels, must use --emb" + aud_features = aud_features.long() + + print(f'[INFO] load {self.opt.aud} aud_features: {aud_features.shape}') + + # load action units + import pandas as pd + au_blink_info=pd.read_csv(os.path.join(self.root_path, 'au.csv')) + au_blink = au_blink_info[' AU45_r'].values + + self.torso_img = [] + self.images = [] + + self.poses = [] + self.exps = [] + + self.auds = [] + self.face_rect = [] + self.lhalf_rect = [] + self.lips_rect = [] + self.eye_area = [] + self.eye_rect = [] + + for f in tqdm.tqdm(frames, desc=f'Loading {type} data'): + + f_path = os.path.join(self.root_path, 'gt_imgs', str(f['img_id']) + '.jpg') + + if not os.path.exists(f_path): + print('[WARN]', f_path, 'NOT FOUND!') + continue + + pose = np.array(f['transform_matrix'], dtype=np.float32) # [4, 4] + pose = nerf_matrix_to_ngp(pose, scale=self.scale, offset=self.offset) + self.poses.append(pose) + + if self.preload > 0: + image = cv2.imread(f_path, cv2.IMREAD_UNCHANGED) # [H, W, 3] o [H, W, 4] + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = image.astype(np.float32) / 255 # [H, W, 3/4] + + self.images.append(image) + else: + self.images.append(f_path) + + # load frame-wise bg + + torso_img_path = os.path.join(self.root_path, 'torso_imgs', str(f['img_id']) + '.png') + + if self.preload > 0: + torso_img = cv2.imread(torso_img_path, cv2.IMREAD_UNCHANGED) # [H, W, 4] + torso_img = cv2.cvtColor(torso_img, cv2.COLOR_BGRA2RGBA) + torso_img = torso_img.astype(np.float32) / 255 # [H, W, 3/4] + + self.torso_img.append(torso_img) + else: + self.torso_img.append(torso_img_path) + + # find the corresponding audio to the image frame + if not self.opt.asr and self.opt.aud == '': + aud = aud_features[min(f['aud_id'], aud_features.shape[0] - 1)] # careful for the last frame... + self.auds.append(aud) + + # load lms and extract face + lms = np.loadtxt(os.path.join(self.root_path, 'ori_imgs', str(f['img_id']) + '.lms')) # [68, 2] + + lh_xmin, lh_xmax = int(lms[31:36, 1].min()), int(lms[:, 1].max()) # actually lower half area + xmin, xmax = int(lms[:, 1].min()), int(lms[:, 1].max()) + ymin, ymax = int(lms[:, 0].min()), int(lms[:, 0].max()) + self.face_rect.append([xmin, xmax, ymin, ymax]) + self.lhalf_rect.append([lh_xmin, lh_xmax, ymin, ymax]) + + if self.opt.exp_eye: + # eyes_left = slice(36, 42) + # eyes_right = slice(42, 48) + + # area_left = polygon_area(lms[eyes_left, 0], lms[eyes_left, 1]) + # area_right = polygon_area(lms[eyes_right, 0], lms[eyes_right, 1]) + + # # area percentage of two eyes of the whole image... + # area = (area_left + area_right) / (self.H * self.W) * 100 + + # action units blink AU45 + area = au_blink[f['img_id']] + area = np.clip(area, 0, 2) / 2 + # area = area + np.random.rand() / 10 + self.eye_area.append(area) + + xmin, xmax = int(lms[36:48, 1].min()), int(lms[36:48, 1].max()) + ymin, ymax = int(lms[36:48, 0].min()), int(lms[36:48, 0].max()) + self.eye_rect.append([xmin, xmax, ymin, ymax]) + + if self.opt.finetune_lips: + lips = slice(48, 60) + xmin, xmax = int(lms[lips, 1].min()), int(lms[lips, 1].max()) + ymin, ymax = int(lms[lips, 0].min()), int(lms[lips, 0].max()) + + # padding to H == W + cx = (xmin + xmax) // 2 + cy = (ymin + ymax) // 2 + + l = max(xmax - xmin, ymax - ymin) // 2 + xmin = max(0, cx - l) + xmax = min(self.H, cx + l) + ymin = max(0, cy - l) + ymax = min(self.W, cy + l) + + self.lips_rect.append([xmin, xmax, ymin, ymax]) + + # load pre-extracted background image (should be the same size as training image...) + + if self.opt.bg_img == 'white': # special + bg_img = np.ones((self.H, self.W, 3), dtype=np.float32) + elif self.opt.bg_img == 'black': # special + bg_img = np.zeros((self.H, self.W, 3), dtype=np.float32) + else: # load from file + # default bg + if self.opt.bg_img == '': + self.opt.bg_img = os.path.join(self.root_path, 'bc.jpg') + bg_img = cv2.imread(self.opt.bg_img, cv2.IMREAD_UNCHANGED) # [H, W, 3] + if bg_img.shape[0] != self.H or bg_img.shape[1] != self.W: + bg_img = cv2.resize(bg_img, (self.W, self.H), interpolation=cv2.INTER_AREA) + bg_img = cv2.cvtColor(bg_img, cv2.COLOR_BGR2RGB) + bg_img = bg_img.astype(np.float32) / 255 # [H, W, 3/4] + + self.bg_img = bg_img + + self.poses = np.stack(self.poses, axis=0) + + # smooth camera path... + if self.opt.smooth_path: + self.poses = smooth_camera_path(self.poses, self.opt.smooth_path_window) + + self.poses = torch.from_numpy(self.poses) # [N, 4, 4] + + if self.preload > 0: + self.images = torch.from_numpy(np.stack(self.images, axis=0)) # [N, H, W, C] + self.torso_img = torch.from_numpy(np.stack(self.torso_img, axis=0)) # [N, H, W, C] + else: + self.images = np.array(self.images) + self.torso_img = np.array(self.torso_img) + + if self.opt.asr: + # live streaming, no pre-calculated auds + self.auds = None + else: + # auds corresponding to images + if self.opt.aud == '': + self.auds = torch.stack(self.auds, dim=0) # [N, 32, 16] + # auds is novel, may have a different length with images + else: + self.auds = aud_features + + self.bg_img = torch.from_numpy(self.bg_img) + + if self.opt.exp_eye: + self.eye_area = np.array(self.eye_area, dtype=np.float32) # [N] + print(f'[INFO] eye_area: {self.eye_area.min()} - {self.eye_area.max()}') + + if self.opt.smooth_eye: + + # naive 5 window average + ori_eye = self.eye_area.copy() + for i in range(ori_eye.shape[0]): + start = max(0, i - 1) + end = min(ori_eye.shape[0], i + 2) + self.eye_area[i] = ori_eye[start:end].mean() + + self.eye_area = torch.from_numpy(self.eye_area).view(-1, 1) # [N, 1] + + + # calculate mean radius of all camera poses + self.radius = self.poses[:, :3, 3].norm(dim=-1).mean(0).item() + #print(f'[INFO] dataset camera poses: radius = {self.radius:.4f}, bound = {self.bound}') + + + # [debug] uncomment to view all training poses. + # visualize_poses(self.poses.numpy()) + + # [debug] uncomment to view examples of randomly generated poses. + # visualize_poses(rand_poses(100, self.device, radius=self.radius).cpu().numpy()) + + if self.preload > 1: + self.poses = self.poses.to(self.device) + + if self.auds is not None: + self.auds = self.auds.to(self.device) + + self.bg_img = self.bg_img.to(torch.half).to(self.device) + + self.torso_img = self.torso_img.to(torch.half).to(self.device) + self.images = self.images.to(torch.half).to(self.device) + + if self.opt.exp_eye: + self.eye_area = self.eye_area.to(self.device) + + # load intrinsics + if 'focal_len' in transform: + fl_x = fl_y = transform['focal_len'] + elif 'fl_x' in transform or 'fl_y' in transform: + fl_x = (transform['fl_x'] if 'fl_x' in transform else transform['fl_y']) / downscale + fl_y = (transform['fl_y'] if 'fl_y' in transform else transform['fl_x']) / downscale + elif 'camera_angle_x' in transform or 'camera_angle_y' in transform: + # blender, assert in radians. already downscaled since we use H/W + fl_x = self.W / (2 * np.tan(transform['camera_angle_x'] / 2)) if 'camera_angle_x' in transform else None + fl_y = self.H / (2 * np.tan(transform['camera_angle_y'] / 2)) if 'camera_angle_y' in transform else None + if fl_x is None: fl_x = fl_y + if fl_y is None: fl_y = fl_x + else: + raise RuntimeError('Failed to load focal length, please check the transforms.json!') + + cx = (transform['cx'] / downscale) if 'cx' in transform else (self.W / 2) + cy = (transform['cy'] / downscale) if 'cy' in transform else (self.H / 2) + + self.intrinsics = np.array([fl_x, fl_y, cx, cy]) + + # directly build the coordinate meshgrid in [-1, 1]^2 + self.bg_coords = get_bg_coords(self.H, self.W, self.device) # [1, H*W, 2] in [-1, 1] + + + def mirror_index(self, index): + size = self.poses.shape[0] + turn = index // size + res = index % size + if turn % 2 == 0: + return res + else: + return size - res - 1 + + + def collate(self, index): + + B = len(index) # a list of length 1 + # assert B == 1 + + results = {} + + # audio use the original index + if self.auds is not None: + auds = get_audio_features(self.auds, self.opt.att, index[0]).to(self.device) + results['auds'] = auds + + # head pose and bg image may mirror (replay --> <-- --> <--). + index[0] = self.mirror_index(index[0]) + + poses = self.poses[index].to(self.device) # [B, 4, 4] + + if self.training and self.opt.finetune_lips: + rect = self.lips_rect[index[0]] + results['rect'] = rect + rays = get_rays(poses, self.intrinsics, self.H, self.W, -1, rect=rect) + else: + rays = get_rays(poses, self.intrinsics, self.H, self.W, self.num_rays, self.opt.patch_size) + + results['index'] = index # for ind. code + results['H'] = self.H + results['W'] = self.W + results['rays_o'] = rays['rays_o'] + results['rays_d'] = rays['rays_d'] + + # get a mask for rays inside rect_face + if self.training: + xmin, xmax, ymin, ymax = self.face_rect[index[0]] + face_mask = (rays['j'] >= xmin) & (rays['j'] < xmax) & (rays['i'] >= ymin) & (rays['i'] < ymax) # [B, N] + results['face_mask'] = face_mask + + xmin, xmax, ymin, ymax = self.lhalf_rect[index[0]] + lhalf_mask = (rays['j'] >= xmin) & (rays['j'] < xmax) & (rays['i'] >= ymin) & (rays['i'] < ymax) # [B, N] + results['lhalf_mask'] = lhalf_mask + + if self.opt.exp_eye: + results['eye'] = self.eye_area[index].to(self.device) # [1] + if self.training: + results['eye'] += (np.random.rand()-0.5) / 10 + xmin, xmax, ymin, ymax = self.eye_rect[index[0]] + eye_mask = (rays['j'] >= xmin) & (rays['j'] < xmax) & (rays['i'] >= ymin) & (rays['i'] < ymax) # [B, N] + results['eye_mask'] = eye_mask + + else: + results['eye'] = None + + # load bg + bg_torso_img = self.torso_img[index] + if self.preload == 0: # on the fly loading + bg_torso_img = cv2.imread(bg_torso_img[0], cv2.IMREAD_UNCHANGED) # [H, W, 4] + bg_torso_img = cv2.cvtColor(bg_torso_img, cv2.COLOR_BGRA2RGBA) + bg_torso_img = bg_torso_img.astype(np.float32) / 255 # [H, W, 3/4] + bg_torso_img = torch.from_numpy(bg_torso_img).unsqueeze(0) + bg_torso_img = bg_torso_img[..., :3] * bg_torso_img[..., 3:] + self.bg_img * (1 - bg_torso_img[..., 3:]) + bg_torso_img = bg_torso_img.view(B, -1, 3).to(self.device) + + if not self.opt.torso: + bg_img = bg_torso_img + else: + bg_img = self.bg_img.view(1, -1, 3).repeat(B, 1, 1).to(self.device) + + if self.training: + bg_img = torch.gather(bg_img, 1, torch.stack(3 * [rays['inds']], -1)) # [B, N, 3] + + results['bg_color'] = bg_img + + if self.opt.torso and self.training: + bg_torso_img = torch.gather(bg_torso_img, 1, torch.stack(3 * [rays['inds']], -1)) # [B, N, 3] + results['bg_torso_color'] = bg_torso_img + + images = self.images[index] # [B, H, W, 3/4] + if self.preload == 0: + images = cv2.imread(images[0], cv2.IMREAD_UNCHANGED) # [H, W, 3] + images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB) + images = images.astype(np.float32) / 255 # [H, W, 3] + images = torch.from_numpy(images).unsqueeze(0) + images = images.to(self.device) + + if self.training: + C = images.shape[-1] + images = torch.gather(images.view(B, -1, C), 1, torch.stack(C * [rays['inds']], -1)) # [B, N, 3/4] + + results['images'] = images + + if self.training: + bg_coords = torch.gather(self.bg_coords, 1, torch.stack(2 * [rays['inds']], -1)) # [1, N, 2] + else: + bg_coords = self.bg_coords # [1, N, 2] + + results['bg_coords'] = bg_coords + + # results['poses'] = convert_poses(poses) # [B, 6] + # results['poses_matrix'] = poses # [B, 4, 4] + results['poses'] = poses # [B, 4, 4] + + return results + + def dataloader(self): + + if self.training: + # training len(poses) == len(auds) + size = self.poses.shape[0] + else: + # test with novel auds, then use its length + if self.auds is not None: + size = self.auds.shape[0] + # live stream test, use 2 * len(poses), so it naturally mirrors. + else: + size = 2 * self.poses.shape[0] + + loader = DataLoader(list(range(size)), batch_size=1, collate_fn=self.collate, shuffle=self.training, num_workers=0) + loader._data = self # an ugly fix... we need poses in trainer. + + # do evaluate if has gt images and use self-driven setting + loader.has_gt = (self.opt.aud == '') + + return loader \ No newline at end of file diff --git a/nerf_triplane/renderer.py b/nerf_triplane/renderer.py new file mode 100644 index 0000000..3dfe8f2 --- /dev/null +++ b/nerf_triplane/renderer.py @@ -0,0 +1,700 @@ +import math +import trimesh +import numpy as np +import random + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import raymarching +from .utils import custom_meshgrid, get_audio_features, euler_angles_to_matrix, convert_poses + +def sample_pdf(bins, weights, n_samples, det=False): + # This implementation is from NeRF + # bins: [B, T], old_z_vals + # weights: [B, T - 1], bin weights. + # return: [B, n_samples], new_z_vals + + # Get pdf + weights = weights + 1e-5 # prevent nans + pdf = weights / torch.sum(weights, -1, keepdim=True) + cdf = torch.cumsum(pdf, -1) + cdf = torch.cat([torch.zeros_like(cdf[..., :1]), cdf], -1) + # Take uniform samples + if det: + u = torch.linspace(0. + 0.5 / n_samples, 1. - 0.5 / n_samples, steps=n_samples).to(weights.device) + u = u.expand(list(cdf.shape[:-1]) + [n_samples]) + else: + u = torch.rand(list(cdf.shape[:-1]) + [n_samples]).to(weights.device) + + # Invert CDF + u = u.contiguous() + inds = torch.searchsorted(cdf, u, right=True) + below = torch.max(torch.zeros_like(inds - 1), inds - 1) + above = torch.min((cdf.shape[-1] - 1) * torch.ones_like(inds), inds) + inds_g = torch.stack([below, above], -1) # (B, n_samples, 2) + + matched_shape = [inds_g.shape[0], inds_g.shape[1], cdf.shape[-1]] + cdf_g = torch.gather(cdf.unsqueeze(1).expand(matched_shape), 2, inds_g) + bins_g = torch.gather(bins.unsqueeze(1).expand(matched_shape), 2, inds_g) + + denom = (cdf_g[..., 1] - cdf_g[..., 0]) + denom = torch.where(denom < 1e-5, torch.ones_like(denom), denom) + t = (u - cdf_g[..., 0]) / denom + samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0]) + + return samples + + +def plot_pointcloud(pc, color=None): + # pc: [N, 3] + # color: [N, 3/4] + print('[visualize points]', pc.shape, pc.dtype, pc.min(0), pc.max(0)) + pc = trimesh.PointCloud(pc, color) + # axis + axes = trimesh.creation.axis(axis_length=4) + # sphere + sphere = trimesh.creation.icosphere(radius=1) + trimesh.Scene([pc, axes, sphere]).show() + + +class NeRFRenderer(nn.Module): + def __init__(self, opt): + + super().__init__() + + self.opt = opt + self.bound = opt.bound + self.cascade = 1 + math.ceil(math.log2(opt.bound)) + self.grid_size = 128 + self.density_scale = 1 + + self.min_near = opt.min_near + self.density_thresh = opt.density_thresh + self.density_thresh_torso = opt.density_thresh_torso + + self.exp_eye = opt.exp_eye + self.test_train = opt.test_train + self.smooth_lips = opt.smooth_lips + + self.torso = opt.torso + self.cuda_ray = opt.cuda_ray + + # prepare aabb with a 6D tensor (xmin, ymin, zmin, xmax, ymax, zmax) + # NOTE: aabb (can be rectangular) is only used to generate points, we still rely on bound (always cubic) to calculate density grid and hashing. + aabb_train = torch.FloatTensor([-opt.bound, -opt.bound/2, -opt.bound, opt.bound, opt.bound/2, opt.bound]) + aabb_infer = aabb_train.clone() + self.register_buffer('aabb_train', aabb_train) + self.register_buffer('aabb_infer', aabb_infer) + + # individual codes + self.individual_num = opt.ind_num + + self.individual_dim = opt.ind_dim + if self.individual_dim > 0: + self.individual_codes = nn.Parameter(torch.randn(self.individual_num, self.individual_dim) * 0.1) + + if self.torso: + self.individual_dim_torso = opt.ind_dim_torso + if self.individual_dim_torso > 0: + self.individual_codes_torso = nn.Parameter(torch.randn(self.individual_num, self.individual_dim_torso) * 0.1) + + # optimize camera pose + self.train_camera = self.opt.train_camera + if self.train_camera: + self.camera_dR = nn.Parameter(torch.zeros(self.individual_num, 3)) # euler angle + self.camera_dT = nn.Parameter(torch.zeros(self.individual_num, 3)) # xyz offset + + # extra state for cuda raymarching + + # 3D head density grid + density_grid = torch.zeros([self.cascade, self.grid_size ** 3]) # [CAS, H * H * H] + density_bitfield = torch.zeros(self.cascade * self.grid_size ** 3 // 8, dtype=torch.uint8) # [CAS * H * H * H // 8] + self.register_buffer('density_grid', density_grid) + self.register_buffer('density_bitfield', density_bitfield) + self.mean_density = 0 + self.iter_density = 0 + + # 2D torso density grid + if self.torso: + density_grid_torso = torch.zeros([self.grid_size ** 2]) # [H * H] + self.register_buffer('density_grid_torso', density_grid_torso) + self.mean_density_torso = 0 + + # step counter + step_counter = torch.zeros(16, 2, dtype=torch.int32) # 16 is hardcoded for averaging... + self.register_buffer('step_counter', step_counter) + self.mean_count = 0 + self.local_step = 0 + + # decay for enc_a + if self.smooth_lips: + self.enc_a = None + + def forward(self, x, d): + raise NotImplementedError() + + # separated density and color query (can accelerate non-cuda-ray mode.) + def density(self, x): + raise NotImplementedError() + + def color(self, x, d, mask=None, **kwargs): + raise NotImplementedError() + + def reset_extra_state(self): + if not self.cuda_ray: + return + # density grid + self.density_grid.zero_() + self.mean_density = 0 + self.iter_density = 0 + # step counter + self.step_counter.zero_() + self.mean_count = 0 + self.local_step = 0 + + + def run_cuda(self, rays_o, rays_d, auds, bg_coords, poses, eye=None, index=0, dt_gamma=0, bg_color=None, perturb=False, force_all_rays=False, max_steps=1024, T_thresh=1e-4, **kwargs): + # rays_o, rays_d: [B, N, 3], assumes B == 1 + # auds: [B, 16] + # index: [B] + # return: image: [B, N, 3], depth: [B, N] + + prefix = rays_o.shape[:-1] + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + bg_coords = bg_coords.contiguous().view(-1, 2) + + # only add camera offset at training! + if self.train_camera and (self.training or self.test_train): + dT = self.camera_dT[index] # [1, 3] + dR = euler_angles_to_matrix(self.camera_dR[index] / 180 * np.pi + 1e-8).squeeze(0) # [1, 3] --> [3, 3] + + rays_o = rays_o + dT + rays_d = rays_d @ dR + + N = rays_o.shape[0] # N = B * N, in fact + device = rays_o.device + + results = {} + + # pre-calculate near far + nears, fars = raymarching.near_far_from_aabb(rays_o, rays_d, self.aabb_train if self.training else self.aabb_infer, self.min_near) + nears = nears.detach() + fars = fars.detach() + + # encode audio + enc_a = self.encode_audio(auds) # [1, 64] + + if enc_a is not None and self.smooth_lips: + if self.enc_a is not None: + _lambda = 0.35 + enc_a = _lambda * self.enc_a + (1 - _lambda) * enc_a + self.enc_a = enc_a + + + if self.individual_dim > 0: + if self.training: + ind_code = self.individual_codes[index] + # use a fixed ind code for the unknown test data. + else: + ind_code = self.individual_codes[0] + else: + ind_code = None + + if self.training: + # setup counter + counter = self.step_counter[self.local_step % 16] + counter.zero_() # set to 0 + self.local_step += 1 + + xyzs, dirs, deltas, rays = raymarching.march_rays_train(rays_o, rays_d, self.bound, self.density_bitfield, self.cascade, self.grid_size, nears, fars, counter, self.mean_count, perturb, 128, force_all_rays, dt_gamma, max_steps) + sigmas, rgbs, amb_aud, amb_eye, uncertainty = self(xyzs, dirs, enc_a, ind_code, eye) + sigmas = self.density_scale * sigmas + + #print(f'valid RGB query ratio: {mask.sum().item() / mask.shape[0]} (total = {mask.sum().item()})') + + # weights_sum, ambient_sum, uncertainty_sum, depth, image = raymarching.composite_rays_train_uncertainty(sigmas, rgbs, ambient.abs().sum(-1), uncertainty, deltas, rays) + weights_sum, amb_aud_sum, amb_eye_sum, uncertainty_sum, depth, image = raymarching.composite_rays_train_triplane(sigmas, rgbs, amb_aud.abs().sum(-1), amb_eye.abs().sum(-1), uncertainty, deltas, rays) + + # for training only + results['weights_sum'] = weights_sum + results['ambient_aud'] = amb_aud_sum + results['ambient_eye'] = amb_eye_sum + results['uncertainty'] = uncertainty_sum + + results['rays'] = xyzs, dirs, enc_a, ind_code, eye + + else: + + dtype = torch.float32 + + weights_sum = torch.zeros(N, dtype=dtype, device=device) + depth = torch.zeros(N, dtype=dtype, device=device) + image = torch.zeros(N, 3, dtype=dtype, device=device) + amb_aud_sum = torch.zeros(N, dtype=dtype, device=device) + amb_eye_sum = torch.zeros(N, dtype=dtype, device=device) + uncertainty_sum = torch.zeros(N, dtype=dtype, device=device) + + n_alive = N + rays_alive = torch.arange(n_alive, dtype=torch.int32, device=device) # [N] + rays_t = nears.clone() # [N] + + step = 0 + + while step < max_steps: + + # count alive rays + n_alive = rays_alive.shape[0] + + # exit loop + if n_alive <= 0: + break + + # decide compact_steps + n_step = max(min(N // n_alive, 8), 1) + + xyzs, dirs, deltas = raymarching.march_rays(n_alive, n_step, rays_alive, rays_t, rays_o, rays_d, self.bound, self.density_bitfield, self.cascade, self.grid_size, nears, fars, 128, perturb if step == 0 else False, dt_gamma, max_steps) + + sigmas, rgbs, ambients_aud, ambients_eye, uncertainties = self(xyzs, dirs, enc_a, ind_code, eye) + sigmas = self.density_scale * sigmas + + # raymarching.composite_rays_uncertainty(n_alive, n_step, rays_alive, rays_t, sigmas, rgbs, deltas, ambients, uncertainties, weights_sum, depth, image, ambient_sum, uncertainty_sum, T_thresh) + raymarching.composite_rays_triplane(n_alive, n_step, rays_alive, rays_t, sigmas, rgbs, deltas, ambients_aud, ambients_eye, uncertainties, weights_sum, depth, image, amb_aud_sum, amb_eye_sum, uncertainty_sum, T_thresh) + + rays_alive = rays_alive[rays_alive >= 0] + + # print(f'step = {step}, n_step = {n_step}, n_alive = {n_alive}, xyzs: {xyzs.shape}') + + step += n_step + + torso_results = self.run_torso(rays_o, bg_coords, poses, index, bg_color) + bg_color = torso_results['bg_color'] + + image = image + (1 - weights_sum).unsqueeze(-1) * bg_color + image = image.view(*prefix, 3) + image = image.clamp(0, 1) + + depth = torch.clamp(depth - nears, min=0) / (fars - nears) + depth = depth.view(*prefix) + + amb_aud_sum = amb_aud_sum.view(*prefix) + amb_eye_sum = amb_eye_sum.view(*prefix) + + results['depth'] = depth + results['image'] = image # head_image if train, else com_image + results['ambient_aud'] = amb_aud_sum + results['ambient_eye'] = amb_eye_sum + results['uncertainty'] = uncertainty_sum + + return results + + + def run_torso(self, rays_o, bg_coords, poses, index=0, bg_color=None, **kwargs): + # rays_o, rays_d: [B, N, 3], assumes B == 1 + # auds: [B, 16] + # index: [B] + # return: image: [B, N, 3], depth: [B, N] + + rays_o = rays_o.contiguous().view(-1, 3) + bg_coords = bg_coords.contiguous().view(-1, 2) + + N = rays_o.shape[0] # N = B * N, in fact + device = rays_o.device + + results = {} + + # background + if bg_color is None: + bg_color = 1 + + # first mix torso with background + if self.torso: + # torso ind code + if self.individual_dim_torso > 0: + if self.training: + ind_code_torso = self.individual_codes_torso[index] + # use a fixed ind code for the unknown test data. + else: + ind_code_torso = self.individual_codes_torso[0] + else: + ind_code_torso = None + + # 2D density grid for acceleration... + density_thresh_torso = min(self.density_thresh_torso, self.mean_density_torso) + occupancy = F.grid_sample(self.density_grid_torso.view(1, 1, self.grid_size, self.grid_size), bg_coords.view(1, -1, 1, 2), align_corners=True).view(-1) + mask = occupancy > density_thresh_torso + + # masked query of torso + torso_alpha = torch.zeros([N, 1], device=device) + torso_color = torch.zeros([N, 3], device=device) + + if mask.any(): + torso_alpha_mask, torso_color_mask, deform = self.forward_torso(bg_coords[mask], poses, ind_code_torso) + + torso_alpha[mask] = torso_alpha_mask.float() + torso_color[mask] = torso_color_mask.float() + + results['deform'] = deform + + # first mix torso with background + + bg_color = torso_color * torso_alpha + bg_color * (1 - torso_alpha) + + results['torso_alpha'] = torso_alpha + results['torso_color'] = bg_color + + # print(torso_alpha.shape, torso_alpha.max().item(), torso_alpha.min().item()) + + results['bg_color'] = bg_color + + return results + + + @torch.no_grad() + def mark_untrained_grid(self, poses, intrinsic, S=64): + # poses: [B, 4, 4] + # intrinsic: [3, 3] + + if not self.cuda_ray: + return + + if isinstance(poses, np.ndarray): + poses = torch.from_numpy(poses) + + B = poses.shape[0] + + fx, fy, cx, cy = intrinsic + + X = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + Y = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + Z = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + + count = torch.zeros_like(self.density_grid) + poses = poses.to(count.device) + + # 5-level loop, forgive me... + + for xs in X: + for ys in Y: + for zs in Z: + + # construct points + xx, yy, zz = custom_meshgrid(xs, ys, zs) + coords = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # [N, 3], in [0, 128) + indices = raymarching.morton3D(coords).long() # [N] + world_xyzs = (2 * coords.float() / (self.grid_size - 1) - 1).unsqueeze(0) # [1, N, 3] in [-1, 1] + + # cascading + for cas in range(self.cascade): + bound = min(2 ** cas, self.bound) + half_grid_size = bound / self.grid_size + # scale to current cascade's resolution + cas_world_xyzs = world_xyzs * (bound - half_grid_size) + + # split batch to avoid OOM + head = 0 + while head < B: + tail = min(head + S, B) + + # world2cam transform (poses is c2w, so we need to transpose it. Another transpose is needed for batched matmul, so the final form is without transpose.) + cam_xyzs = cas_world_xyzs - poses[head:tail, :3, 3].unsqueeze(1) + cam_xyzs = cam_xyzs @ poses[head:tail, :3, :3] # [S, N, 3] + + # query if point is covered by any camera + mask_z = cam_xyzs[:, :, 2] > 0 # [S, N] + mask_x = torch.abs(cam_xyzs[:, :, 0]) < cx / fx * cam_xyzs[:, :, 2] + half_grid_size * 2 + mask_y = torch.abs(cam_xyzs[:, :, 1]) < cy / fy * cam_xyzs[:, :, 2] + half_grid_size * 2 + mask = (mask_z & mask_x & mask_y).sum(0).reshape(-1) # [N] + + # update count + count[cas, indices] += mask + head += S + + # mark untrained grid as -1 + self.density_grid[count == 0] = -1 + + #print(f'[mark untrained grid] {(count == 0).sum()} from {resolution ** 3 * self.cascade}') + + @torch.no_grad() + def update_extra_state(self, decay=0.95, S=128): + # call before each epoch to update extra states. + + if not self.cuda_ray: + return + + # use random auds (different expressions should have similar density grid...) + rand_idx = random.randint(0, self.aud_features.shape[0] - 1) + auds = get_audio_features(self.aud_features, self.att, rand_idx).to(self.density_bitfield.device) + + # encode audio + enc_a = self.encode_audio(auds) + + ### update density grid + if not self.torso: # forbid updating head if is training torso... + + tmp_grid = torch.zeros_like(self.density_grid) + + # use a random eye area based on training dataset's statistics... + if self.exp_eye: + eye = self.eye_area[[rand_idx]].to(self.density_bitfield.device) # [1, 1] + else: + eye = None + + # full update + X = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + Y = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + Z = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + + for xs in X: + for ys in Y: + for zs in Z: + + # construct points + xx, yy, zz = custom_meshgrid(xs, ys, zs) + coords = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # [N, 3], in [0, 128) + indices = raymarching.morton3D(coords).long() # [N] + xyzs = 2 * coords.float() / (self.grid_size - 1) - 1 # [N, 3] in [-1, 1] + + # cascading + for cas in range(self.cascade): + bound = min(2 ** cas, self.bound) + half_grid_size = bound / self.grid_size + # scale to current cascade's resolution + cas_xyzs = xyzs * (bound - half_grid_size) + # add noise in [-hgs, hgs] + cas_xyzs += (torch.rand_like(cas_xyzs) * 2 - 1) * half_grid_size + # query density + sigmas = self.density(cas_xyzs, enc_a, eye)['sigma'].reshape(-1).detach().to(tmp_grid.dtype) + sigmas *= self.density_scale + # assign + tmp_grid[cas, indices] = sigmas + + # dilate the density_grid (less aggressive culling) + tmp_grid = raymarching.morton3D_dilation(tmp_grid) + + # ema update + valid_mask = (self.density_grid >= 0) & (tmp_grid >= 0) + self.density_grid[valid_mask] = torch.maximum(self.density_grid[valid_mask] * decay, tmp_grid[valid_mask]) + self.mean_density = torch.mean(self.density_grid.clamp(min=0)).item() # -1 non-training regions are viewed as 0 density. + self.iter_density += 1 + + # convert to bitfield + density_thresh = min(self.mean_density, self.density_thresh) + self.density_bitfield = raymarching.packbits(self.density_grid, density_thresh, self.density_bitfield) + + ### update torso density grid + if self.torso: + tmp_grid_torso = torch.zeros_like(self.density_grid_torso) + + # random pose, random ind_code + rand_idx = random.randint(0, self.poses.shape[0] - 1) + # pose = convert_poses(self.poses[[rand_idx]]).to(self.density_bitfield.device) + pose = self.poses[[rand_idx]].to(self.density_bitfield.device) + + if self.opt.ind_dim_torso > 0: + ind_code = self.individual_codes_torso[[rand_idx]] + else: + ind_code = None + + X = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + Y = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + + half_grid_size = 1 / self.grid_size + + for xs in X: + for ys in Y: + xx, yy = custom_meshgrid(xs, ys) + coords = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1)], dim=-1) # [N, 2], in [0, 128) + indices = (coords[:, 1] * self.grid_size + coords[:, 0]).long() # NOTE: xy transposed! + xys = 2 * coords.float() / (self.grid_size - 1) - 1 # [N, 2] in [-1, 1] + xys = xys * (1 - half_grid_size) + # add noise in [-hgs, hgs] + xys += (torch.rand_like(xys) * 2 - 1) * half_grid_size + # query density + alphas, _, _ = self.forward_torso(xys, pose, ind_code) # [N, 1] + + # assign + tmp_grid_torso[indices] = alphas.squeeze(1).float() + + # dilate + tmp_grid_torso = tmp_grid_torso.view(1, 1, self.grid_size, self.grid_size) + # tmp_grid_torso = F.max_pool2d(tmp_grid_torso, kernel_size=3, stride=1, padding=1) + tmp_grid_torso = F.max_pool2d(tmp_grid_torso, kernel_size=5, stride=1, padding=2) + tmp_grid_torso = tmp_grid_torso.view(-1) + + self.density_grid_torso = torch.maximum(self.density_grid_torso * decay, tmp_grid_torso) + self.mean_density_torso = torch.mean(self.density_grid_torso).item() + + # density_thresh_torso = min(self.density_thresh_torso, self.mean_density_torso) + # print(f'[density grid torso] min={self.density_grid_torso.min().item():.4f}, max={self.density_grid_torso.max().item():.4f}, mean={self.mean_density_torso:.4f}, occ_rate={(self.density_grid_torso > density_thresh_torso).sum() / (128**2):.3f}') + + ### update step counter + total_step = min(16, self.local_step) + if total_step > 0: + self.mean_count = int(self.step_counter[:total_step, 0].sum().item() / total_step) + self.local_step = 0 + + #print(f'[density grid] min={self.density_grid.min().item():.4f}, max={self.density_grid.max().item():.4f}, mean={self.mean_density:.4f}, occ_rate={(self.density_grid > 0.01).sum() / (128**3 * self.cascade):.3f} | [step counter] mean={self.mean_count}') + + + @torch.no_grad() + def get_audio_grid(self, S=128): + # call before each epoch to update extra states. + + if not self.cuda_ray: + return + + # use random auds (different expressions should have similar density grid...) + rand_idx = random.randint(0, self.aud_features.shape[0] - 1) + auds = get_audio_features(self.aud_features, self.att, rand_idx).to(self.density_bitfield.device) + + # encode audio + enc_a = self.encode_audio(auds) + tmp_grid = torch.zeros_like(self.density_grid) + + # use a random eye area based on training dataset's statistics... + if self.exp_eye: + eye = self.eye_area[[rand_idx]].to(self.density_bitfield.device) # [1, 1] + else: + eye = None + + # full update + X = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + Y = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + Z = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + + for xs in X: + for ys in Y: + for zs in Z: + + # construct points + xx, yy, zz = custom_meshgrid(xs, ys, zs) + coords = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # [N, 3], in [0, 128) + indices = raymarching.morton3D(coords).long() # [N] + xyzs = 2 * coords.float() / (self.grid_size - 1) - 1 # [N, 3] in [-1, 1] + + # cascading + for cas in range(self.cascade): + bound = min(2 ** cas, self.bound) + half_grid_size = bound / self.grid_size + # scale to current cascade's resolution + cas_xyzs = xyzs * (bound - half_grid_size) + # add noise in [-hgs, hgs] + cas_xyzs += (torch.rand_like(cas_xyzs) * 2 - 1) * half_grid_size + # query density + aud_norms = self.density(cas_xyzs.to(tmp_grid.dtype), enc_a, eye)['ambient_aud'].reshape(-1).detach().to(tmp_grid.dtype) + # assign + tmp_grid[cas, indices] = aud_norms + + # dilate the density_grid (less aggressive culling) + tmp_grid = raymarching.morton3D_dilation(tmp_grid) + return tmp_grid + # # ema update + # valid_mask = (self.density_grid >= 0) & (tmp_grid >= 0) + # self.density_grid[valid_mask] = torch.maximum(self.density_grid[valid_mask] * decay, tmp_grid[valid_mask]) + + + @torch.no_grad() + def get_eye_grid(self, S=128): + # call before each epoch to update extra states. + + if not self.cuda_ray: + return + + # use random auds (different expressions should have similar density grid...) + rand_idx = random.randint(0, self.aud_features.shape[0] - 1) + auds = get_audio_features(self.aud_features, self.att, rand_idx).to(self.density_bitfield.device) + + # encode audio + enc_a = self.encode_audio(auds) + tmp_grid = torch.zeros_like(self.density_grid) + + # use a random eye area based on training dataset's statistics... + if self.exp_eye: + eye = self.eye_area[[rand_idx]].to(self.density_bitfield.device) # [1, 1] + else: + eye = None + + # full update + X = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + Y = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + Z = torch.arange(self.grid_size, dtype=torch.int32, device=self.density_bitfield.device).split(S) + + for xs in X: + for ys in Y: + for zs in Z: + + # construct points + xx, yy, zz = custom_meshgrid(xs, ys, zs) + coords = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # [N, 3], in [0, 128) + indices = raymarching.morton3D(coords).long() # [N] + xyzs = 2 * coords.float() / (self.grid_size - 1) - 1 # [N, 3] in [-1, 1] + + # cascading + for cas in range(self.cascade): + bound = min(2 ** cas, self.bound) + half_grid_size = bound / self.grid_size + # scale to current cascade's resolution + cas_xyzs = xyzs * (bound - half_grid_size) + # add noise in [-hgs, hgs] + cas_xyzs += (torch.rand_like(cas_xyzs) * 2 - 1) * half_grid_size + # query density + eye_norms = self.density(cas_xyzs.to(tmp_grid.dtype), enc_a, eye)['ambient_eye'].reshape(-1).detach().to(tmp_grid.dtype) + # assign + tmp_grid[cas, indices] = eye_norms + + # dilate the density_grid (less aggressive culling) + tmp_grid = raymarching.morton3D_dilation(tmp_grid) + return tmp_grid + # # ema update + # valid_mask = (self.density_grid >= 0) & (tmp_grid >= 0) + # self.density_grid[valid_mask] = torch.maximum(self.density_grid[valid_mask] * decay, tmp_grid[valid_mask]) + + + + def render(self, rays_o, rays_d, auds, bg_coords, poses, staged=False, max_ray_batch=4096, **kwargs): + # rays_o, rays_d: [B, N, 3], assumes B == 1 + # auds: [B, 29, 16] + # eye: [B, 1] + # bg_coords: [1, N, 2] + # return: pred_rgb: [B, N, 3] + + _run = self.run_cuda + + B, N = rays_o.shape[:2] + device = rays_o.device + + # never stage when cuda_ray + if staged and not self.cuda_ray: + # not used + raise NotImplementedError + + else: + results = _run(rays_o, rays_d, auds, bg_coords, poses, **kwargs) + + return results + + + def render_torso(self, rays_o, rays_d, auds, bg_coords, poses, staged=False, max_ray_batch=4096, **kwargs): + # rays_o, rays_d: [B, N, 3], assumes B == 1 + # auds: [B, 29, 16] + # eye: [B, 1] + # bg_coords: [1, N, 2] + # return: pred_rgb: [B, N, 3] + + _run = self.run_torso + + B, N = rays_o.shape[:2] + device = rays_o.device + + # never stage when cuda_ray + if staged and not self.cuda_ray: + # not used + raise NotImplementedError + + else: + results = _run(rays_o, bg_coords, poses, **kwargs) + + return results \ No newline at end of file diff --git a/nerf_triplane/utils.py b/nerf_triplane/utils.py new file mode 100644 index 0000000..d9304d3 --- /dev/null +++ b/nerf_triplane/utils.py @@ -0,0 +1,1514 @@ +import os +import glob +import tqdm +import math +import random +import warnings +import tensorboardX + +import numpy as np +import pandas as pd + +import time +from datetime import datetime + +import cv2 +import matplotlib.pyplot as plt + +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +import torch.distributed as dist +from torch.utils.data import Dataset, DataLoader + +import trimesh +import mcubes +from rich.console import Console +from torch_ema import ExponentialMovingAverage + +from packaging import version as pver +import imageio +import lpips + +def custom_meshgrid(*args): + # ref: https://pytorch.org/docs/stable/generated/torch.meshgrid.html?highlight=meshgrid#torch.meshgrid + if pver.parse(torch.__version__) < pver.parse('1.10'): + return torch.meshgrid(*args) + else: + return torch.meshgrid(*args, indexing='ij') + + +def get_audio_features(features, att_mode, index): + if att_mode == 0: + return features[[index]] + elif att_mode == 1: + left = index - 8 + pad_left = 0 + if left < 0: + pad_left = -left + left = 0 + auds = features[left:index] + if pad_left > 0: + # pad may be longer than auds, so do not use zeros_like + auds = torch.cat([torch.zeros(pad_left, *auds.shape[1:], device=auds.device, dtype=auds.dtype), auds], dim=0) + return auds + elif att_mode == 2: + left = index - 4 + right = index + 4 + pad_left = 0 + pad_right = 0 + if left < 0: + pad_left = -left + left = 0 + if right > features.shape[0]: + pad_right = right - features.shape[0] + right = features.shape[0] + auds = features[left:right] + if pad_left > 0: + auds = torch.cat([torch.zeros_like(auds[:pad_left]), auds], dim=0) + if pad_right > 0: + auds = torch.cat([auds, torch.zeros_like(auds[:pad_right])], dim=0) # [8, 16] + return auds + else: + raise NotImplementedError(f'wrong att_mode: {att_mode}') + + +@torch.jit.script +def linear_to_srgb(x): + return torch.where(x < 0.0031308, 12.92 * x, 1.055 * x ** 0.41666 - 0.055) + + +@torch.jit.script +def srgb_to_linear(x): + return torch.where(x < 0.04045, x / 12.92, ((x + 0.055) / 1.055) ** 2.4) + +# copied from pytorch3d +def _angle_from_tan( + axis: str, other_axis: str, data, horizontal: bool, tait_bryan: bool +) -> torch.Tensor: + """ + Extract the first or third Euler angle from the two members of + the matrix which are positive constant times its sine and cosine. + + Args: + axis: Axis label "X" or "Y or "Z" for the angle we are finding. + other_axis: Axis label "X" or "Y or "Z" for the middle axis in the + convention. + data: Rotation matrices as tensor of shape (..., 3, 3). + horizontal: Whether we are looking for the angle for the third axis, + which means the relevant entries are in the same row of the + rotation matrix. If not, they are in the same column. + tait_bryan: Whether the first and third axes in the convention differ. + + Returns: + Euler Angles in radians for each matrix in data as a tensor + of shape (...). + """ + + i1, i2 = {"X": (2, 1), "Y": (0, 2), "Z": (1, 0)}[axis] + if horizontal: + i2, i1 = i1, i2 + even = (axis + other_axis) in ["XY", "YZ", "ZX"] + if horizontal == even: + return torch.atan2(data[..., i1], data[..., i2]) + if tait_bryan: + return torch.atan2(-data[..., i2], data[..., i1]) + return torch.atan2(data[..., i2], -data[..., i1]) + + +def _index_from_letter(letter: str) -> int: + if letter == "X": + return 0 + if letter == "Y": + return 1 + if letter == "Z": + return 2 + raise ValueError("letter must be either X, Y or Z.") + + +def matrix_to_euler_angles(matrix: torch.Tensor, convention: str = 'XYZ') -> torch.Tensor: + """ + Convert rotations given as rotation matrices to Euler angles in radians. + + Args: + matrix: Rotation matrices as tensor of shape (..., 3, 3). + convention: Convention string of three uppercase letters. + + Returns: + Euler angles in radians as tensor of shape (..., 3). + """ + # if len(convention) != 3: + # raise ValueError("Convention must have 3 letters.") + # if convention[1] in (convention[0], convention[2]): + # raise ValueError(f"Invalid convention {convention}.") + # for letter in convention: + # if letter not in ("X", "Y", "Z"): + # raise ValueError(f"Invalid letter {letter} in convention string.") + # if matrix.size(-1) != 3 or matrix.size(-2) != 3: + # raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.") + i0 = _index_from_letter(convention[0]) + i2 = _index_from_letter(convention[2]) + tait_bryan = i0 != i2 + if tait_bryan: + central_angle = torch.asin( + matrix[..., i0, i2] * (-1.0 if i0 - i2 in [-1, 2] else 1.0) + ) + else: + central_angle = torch.acos(matrix[..., i0, i0]) + + o = ( + _angle_from_tan( + convention[0], convention[1], matrix[..., i2], False, tait_bryan + ), + central_angle, + _angle_from_tan( + convention[2], convention[1], matrix[..., i0, :], True, tait_bryan + ), + ) + return torch.stack(o, -1) + +@torch.cuda.amp.autocast(enabled=False) +def _axis_angle_rotation(axis: str, angle: torch.Tensor) -> torch.Tensor: + """ + Return the rotation matrices for one of the rotations about an axis + of which Euler angles describe, for each value of the angle given. + Args: + axis: Axis label "X" or "Y or "Z". + angle: any shape tensor of Euler angles in radians + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + + cos = torch.cos(angle) + sin = torch.sin(angle) + one = torch.ones_like(angle) + zero = torch.zeros_like(angle) + + if axis == "X": + R_flat = (one, zero, zero, zero, cos, -sin, zero, sin, cos) + elif axis == "Y": + R_flat = (cos, zero, sin, zero, one, zero, -sin, zero, cos) + elif axis == "Z": + R_flat = (cos, -sin, zero, sin, cos, zero, zero, zero, one) + else: + raise ValueError("letter must be either X, Y or Z.") + + return torch.stack(R_flat, -1).reshape(angle.shape + (3, 3)) + +@torch.cuda.amp.autocast(enabled=False) +def euler_angles_to_matrix(euler_angles: torch.Tensor, convention: str='XYZ') -> torch.Tensor: + """ + Convert rotations given as Euler angles in radians to rotation matrices. + Args: + euler_angles: Euler angles in radians as tensor of shape (..., 3). + convention: Convention string of three uppercase letters from + {"X", "Y", and "Z"}. + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + + # print(euler_angles, euler_angles.dtype) + + if euler_angles.dim() == 0 or euler_angles.shape[-1] != 3: + raise ValueError("Invalid input euler angles.") + if len(convention) != 3: + raise ValueError("Convention must have 3 letters.") + if convention[1] in (convention[0], convention[2]): + raise ValueError(f"Invalid convention {convention}.") + for letter in convention: + if letter not in ("X", "Y", "Z"): + raise ValueError(f"Invalid letter {letter} in convention string.") + matrices = [ + _axis_angle_rotation(c, e) + for c, e in zip(convention, torch.unbind(euler_angles, -1)) + ] + + return torch.matmul(torch.matmul(matrices[0], matrices[1]), matrices[2]) + + +@torch.cuda.amp.autocast(enabled=False) +def convert_poses(poses): + # poses: [B, 4, 4] + # return [B, 3], 4 rot, 3 trans + out = torch.empty(poses.shape[0], 6, dtype=torch.float32, device=poses.device) + out[:, :3] = matrix_to_euler_angles(poses[:, :3, :3]) + out[:, 3:] = poses[:, :3, 3] + return out + +@torch.cuda.amp.autocast(enabled=False) +def get_bg_coords(H, W, device): + X = torch.arange(H, device=device) / (H - 1) * 2 - 1 # in [-1, 1] + Y = torch.arange(W, device=device) / (W - 1) * 2 - 1 # in [-1, 1] + xs, ys = custom_meshgrid(X, Y) + bg_coords = torch.cat([xs.reshape(-1, 1), ys.reshape(-1, 1)], dim=-1).unsqueeze(0) # [1, H*W, 2], in [-1, 1] + return bg_coords + + +@torch.cuda.amp.autocast(enabled=False) +def get_rays(poses, intrinsics, H, W, N=-1, patch_size=1, rect=None): + ''' get rays + Args: + poses: [B, 4, 4], cam2world + intrinsics: [4] + H, W, N: int + Returns: + rays_o, rays_d: [B, N, 3] + inds: [B, N] + ''' + + device = poses.device + B = poses.shape[0] + fx, fy, cx, cy = intrinsics + + if rect is not None: + xmin, xmax, ymin, ymax = rect + N = (xmax - xmin) * (ymax - ymin) + + i, j = custom_meshgrid(torch.linspace(0, W-1, W, device=device), torch.linspace(0, H-1, H, device=device)) # float + i = i.t().reshape([1, H*W]).expand([B, H*W]) + 0.5 + j = j.t().reshape([1, H*W]).expand([B, H*W]) + 0.5 + + results = {} + + if N > 0: + N = min(N, H*W) + + if patch_size > 1: + + # random sample left-top cores. + # NOTE: this impl will lead to less sampling on the image corner pixels... but I don't have other ideas. + num_patch = N // (patch_size ** 2) + inds_x = torch.randint(0, H - patch_size, size=[num_patch], device=device) + inds_y = torch.randint(0, W - patch_size, size=[num_patch], device=device) + inds = torch.stack([inds_x, inds_y], dim=-1) # [np, 2] + + # create meshgrid for each patch + pi, pj = custom_meshgrid(torch.arange(patch_size, device=device), torch.arange(patch_size, device=device)) + offsets = torch.stack([pi.reshape(-1), pj.reshape(-1)], dim=-1) # [p^2, 2] + + inds = inds.unsqueeze(1) + offsets.unsqueeze(0) # [np, p^2, 2] + inds = inds.view(-1, 2) # [N, 2] + inds = inds[:, 0] * W + inds[:, 1] # [N], flatten + + inds = inds.expand([B, N]) + + # only get rays in the specified rect + elif rect is not None: + # assert B == 1 + mask = torch.zeros(H, W, dtype=torch.bool, device=device) + xmin, xmax, ymin, ymax = rect + mask[xmin:xmax, ymin:ymax] = 1 + inds = torch.where(mask.view(-1))[0] # [nzn] + inds = inds.unsqueeze(0) # [1, N] + + else: + inds = torch.randint(0, H*W, size=[N], device=device) # may duplicate + inds = inds.expand([B, N]) + + i = torch.gather(i, -1, inds) + j = torch.gather(j, -1, inds) + + + else: + inds = torch.arange(H*W, device=device).expand([B, H*W]) + + results['i'] = i + results['j'] = j + results['inds'] = inds + + zs = torch.ones_like(i) + xs = (i - cx) / fx * zs + ys = (j - cy) / fy * zs + directions = torch.stack((xs, ys, zs), dim=-1) + directions = directions / torch.norm(directions, dim=-1, keepdim=True) + + rays_d = directions @ poses[:, :3, :3].transpose(-1, -2) # (B, N, 3) + + rays_o = poses[..., :3, 3] # [B, 3] + rays_o = rays_o[..., None, :].expand_as(rays_d) # [B, N, 3] + + results['rays_o'] = rays_o + results['rays_d'] = rays_d + + return results + + +def seed_everything(seed): + random.seed(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + #torch.backends.cudnn.deterministic = True + #torch.backends.cudnn.benchmark = True + + +def torch_vis_2d(x, renormalize=False): + # x: [3, H, W] or [1, H, W] or [H, W] + import matplotlib.pyplot as plt + import numpy as np + import torch + + if isinstance(x, torch.Tensor): + if len(x.shape) == 3: + x = x.permute(1,2,0).squeeze() + x = x.detach().cpu().numpy() + + print(f'[torch_vis_2d] {x.shape}, {x.dtype}, {x.min()} ~ {x.max()}') + + x = x.astype(np.float32) + + # renormalize + if renormalize: + x = (x - x.min(axis=0, keepdims=True)) / (x.max(axis=0, keepdims=True) - x.min(axis=0, keepdims=True) + 1e-8) + + plt.imshow(x) + plt.show() + + +def extract_fields(bound_min, bound_max, resolution, query_func, S=128): + + X = torch.linspace(bound_min[0], bound_max[0], resolution).split(S) + Y = torch.linspace(bound_min[1], bound_max[1], resolution).split(S) + Z = torch.linspace(bound_min[2], bound_max[2], resolution).split(S) + + u = np.zeros([resolution, resolution, resolution], dtype=np.float32) + with torch.no_grad(): + for xi, xs in enumerate(X): + for yi, ys in enumerate(Y): + for zi, zs in enumerate(Z): + xx, yy, zz = custom_meshgrid(xs, ys, zs) + pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # [S, 3] + val = query_func(pts).reshape(len(xs), len(ys), len(zs)).detach().cpu().numpy() # [S, 1] --> [x, y, z] + u[xi * S: xi * S + len(xs), yi * S: yi * S + len(ys), zi * S: zi * S + len(zs)] = val + return u + + +def extract_geometry(bound_min, bound_max, resolution, threshold, query_func): + #print('threshold: {}'.format(threshold)) + u = extract_fields(bound_min, bound_max, resolution, query_func) + + #print(u.shape, u.max(), u.min(), np.percentile(u, 50)) + + vertices, triangles = mcubes.marching_cubes(u, threshold) + + b_max_np = bound_max.detach().cpu().numpy() + b_min_np = bound_min.detach().cpu().numpy() + + vertices = vertices / (resolution - 1.0) * (b_max_np - b_min_np)[None, :] + b_min_np[None, :] + return vertices, triangles + + +class PSNRMeter: + def __init__(self): + self.V = 0 + self.N = 0 + + def clear(self): + self.V = 0 + self.N = 0 + + def prepare_inputs(self, *inputs): + outputs = [] + for i, inp in enumerate(inputs): + if torch.is_tensor(inp): + inp = inp.detach().cpu().numpy() + outputs.append(inp) + + return outputs + + def update(self, preds, truths): + preds, truths = self.prepare_inputs(preds, truths) # [B, N, 3] or [B, H, W, 3], range in [0, 1] + + # simplified since max_pixel_value is 1 here. + psnr = -10 * np.log10(np.mean((preds - truths) ** 2)) + + self.V += psnr + self.N += 1 + + def measure(self): + return self.V / self.N + + def write(self, writer, global_step, prefix=""): + writer.add_scalar(os.path.join(prefix, "PSNR"), self.measure(), global_step) + + def report(self): + return f'PSNR = {self.measure():.6f}' + +class LPIPSMeter: + def __init__(self, net='alex', device=None): + self.V = 0 + self.N = 0 + self.net = net + + self.device = device if device is not None else torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.fn = lpips.LPIPS(net=net).eval().to(self.device) + + def clear(self): + self.V = 0 + self.N = 0 + + def prepare_inputs(self, *inputs): + outputs = [] + for i, inp in enumerate(inputs): + inp = inp.permute(0, 3, 1, 2).contiguous() # [B, 3, H, W] + inp = inp.to(self.device) + outputs.append(inp) + return outputs + + def update(self, preds, truths): + preds, truths = self.prepare_inputs(preds, truths) # [B, H, W, 3] --> [B, 3, H, W], range in [0, 1] + v = self.fn(truths, preds, normalize=True).item() # normalize=True: [0, 1] to [-1, 1] + self.V += v + self.N += 1 + + def measure(self): + return self.V / self.N + + def write(self, writer, global_step, prefix=""): + writer.add_scalar(os.path.join(prefix, f"LPIPS ({self.net})"), self.measure(), global_step) + + def report(self): + return f'LPIPS ({self.net}) = {self.measure():.6f}' + + +class LMDMeter: + def __init__(self, backend='dlib', region='mouth'): + self.backend = backend + self.region = region # mouth or face + + if self.backend == 'dlib': + import dlib + + # load checkpoint manually + self.predictor_path = './shape_predictor_68_face_landmarks.dat' + if not os.path.exists(self.predictor_path): + raise FileNotFoundError('Please download dlib checkpoint from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2') + + self.detector = dlib.get_frontal_face_detector() + self.predictor = dlib.shape_predictor(self.predictor_path) + + else: + + import face_alignment + + self.predictor = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False) + + self.V = 0 + self.N = 0 + + def get_landmarks(self, img): + + if self.backend == 'dlib': + dets = self.detector(img, 1) + for det in dets: + shape = self.predictor(img, det) + # ref: https://github.com/PyImageSearch/imutils/blob/c12f15391fcc945d0d644b85194b8c044a392e0a/imutils/face_utils/helpers.py + lms = np.zeros((68, 2), dtype=np.int32) + for i in range(0, 68): + lms[i, 0] = shape.part(i).x + lms[i, 1] = shape.part(i).y + break + + else: + lms = self.predictor.get_landmarks(img)[-1] + + # self.vis_landmarks(img, lms) + lms = lms.astype(np.float32) + + return lms + + def vis_landmarks(self, img, lms): + plt.imshow(img) + plt.plot(lms[48:68, 0], lms[48:68, 1], marker='o', markersize=1, linestyle='-', lw=2) + plt.show() + + def clear(self): + self.V = 0 + self.N = 0 + + def prepare_inputs(self, *inputs): + outputs = [] + for i, inp in enumerate(inputs): + inp = inp.detach().cpu().numpy() + inp = (inp * 255).astype(np.uint8) + outputs.append(inp) + return outputs + + def update(self, preds, truths): + # assert B == 1 + preds, truths = self.prepare_inputs(preds[0], truths[0]) # [H, W, 3] numpy array + + # get lms + lms_pred = self.get_landmarks(preds) + lms_truth = self.get_landmarks(truths) + + if self.region == 'mouth': + lms_pred = lms_pred[48:68] + lms_truth = lms_truth[48:68] + + # avarage + lms_pred = lms_pred - lms_pred.mean(0) + lms_truth = lms_truth - lms_truth.mean(0) + + # distance + dist = np.sqrt(((lms_pred - lms_truth) ** 2).sum(1)).mean(0) + + self.V += dist + self.N += 1 + + def measure(self): + return self.V / self.N + + def write(self, writer, global_step, prefix=""): + writer.add_scalar(os.path.join(prefix, f"LMD ({self.backend})"), self.measure(), global_step) + + def report(self): + return f'LMD ({self.backend}) = {self.measure():.6f}' + + +class Trainer(object): + def __init__(self, + name, # name of this experiment + opt, # extra conf + model, # network + criterion=None, # loss function, if None, assume inline implementation in train_step + optimizer=None, # optimizer + ema_decay=None, # if use EMA, set the decay + ema_update_interval=1000, # update ema per $ training steps. + lr_scheduler=None, # scheduler + metrics=[], # metrics for evaluation, if None, use val_loss to measure performance, else use the first metric. + local_rank=0, # which GPU am I + world_size=1, # total num of GPUs + device=None, # device to use, usually setting to None is OK. (auto choose device) + mute=False, # whether to mute all print + fp16=False, # amp optimize level + eval_interval=1, # eval once every $ epoch + max_keep_ckpt=2, # max num of saved ckpts in disk + workspace='workspace', # workspace to save logs & ckpts + best_mode='min', # the smaller/larger result, the better + use_loss_as_metric=True, # use loss as the first metric + report_metric_at_train=False, # also report metrics at training + use_checkpoint="latest", # which ckpt to use at init time + use_tensorboardX=True, # whether to use tensorboard for logging + scheduler_update_every_step=False, # whether to call scheduler.step() after every train step + ): + + self.name = name + self.opt = opt + self.mute = mute + self.metrics = metrics + self.local_rank = local_rank + self.world_size = world_size + self.workspace = workspace + self.ema_decay = ema_decay + self.ema_update_interval = ema_update_interval + self.fp16 = fp16 + self.best_mode = best_mode + self.use_loss_as_metric = use_loss_as_metric + self.report_metric_at_train = report_metric_at_train + self.max_keep_ckpt = max_keep_ckpt + self.eval_interval = eval_interval + self.use_checkpoint = use_checkpoint + self.use_tensorboardX = use_tensorboardX + self.flip_finetune_lips = self.opt.finetune_lips + self.flip_init_lips = self.opt.init_lips + self.time_stamp = time.strftime("%Y-%m-%d_%H-%M-%S") + self.scheduler_update_every_step = scheduler_update_every_step + self.device = device if device is not None else torch.device(f'cuda:{local_rank}' if torch.cuda.is_available() else 'cpu') + self.console = Console() + + model.to(self.device) + if self.world_size > 1: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank]) + self.model = model + + if isinstance(criterion, nn.Module): + criterion.to(self.device) + self.criterion = criterion + + if optimizer is None: + self.optimizer = optim.Adam(self.model.parameters(), lr=0.001, weight_decay=5e-4) # naive adam + else: + self.optimizer = optimizer(self.model) + + if lr_scheduler is None: + self.lr_scheduler = optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=lambda epoch: 1) # fake scheduler + else: + self.lr_scheduler = lr_scheduler(self.optimizer) + + if ema_decay is not None: + self.ema = ExponentialMovingAverage(self.model.parameters(), decay=ema_decay) + else: + self.ema = None + + self.scaler = torch.cuda.amp.GradScaler(enabled=self.fp16) + + # optionally use LPIPS loss for patch-based training + if self.opt.patch_size > 1 or self.opt.finetune_lips or True: + import lpips + # self.criterion_lpips_vgg = lpips.LPIPS(net='vgg').to(self.device) + self.criterion_lpips_alex = lpips.LPIPS(net='alex').to(self.device) + + # variable init + self.epoch = 0 + self.global_step = 0 + self.local_step = 0 + self.stats = { + "loss": [], + "valid_loss": [], + "results": [], # metrics[0], or valid_loss + "checkpoints": [], # record path of saved ckpt, to automatically remove old ckpt + "best_result": None, + } + + # auto fix + if len(metrics) == 0 or self.use_loss_as_metric: + self.best_mode = 'min' + + # workspace prepare + self.log_ptr = None + if self.workspace is not None: + os.makedirs(self.workspace, exist_ok=True) + self.log_path = os.path.join(workspace, f"log_{self.name}.txt") + self.log_ptr = open(self.log_path, "a+") + + self.ckpt_path = os.path.join(self.workspace, 'checkpoints') + self.best_path = f"{self.ckpt_path}/{self.name}.pth" + os.makedirs(self.ckpt_path, exist_ok=True) + + self.log(f'[INFO] Trainer: {self.name} | {self.time_stamp} | {self.device} | {"fp16" if self.fp16 else "fp32"} | {self.workspace}') + self.log(f'[INFO] #parameters: {sum([p.numel() for p in model.parameters() if p.requires_grad])}') + + if self.workspace is not None: + if self.use_checkpoint == "scratch": + self.log("[INFO] Training from scratch ...") + elif self.use_checkpoint == "latest": + self.log("[INFO] Loading latest checkpoint ...") + self.load_checkpoint() + elif self.use_checkpoint == "latest_model": + self.log("[INFO] Loading latest checkpoint (model only)...") + self.load_checkpoint(model_only=True) + elif self.use_checkpoint == "best": + if os.path.exists(self.best_path): + self.log("[INFO] Loading best checkpoint ...") + self.load_checkpoint(self.best_path) + else: + self.log(f"[INFO] {self.best_path} not found, loading latest ...") + self.load_checkpoint() + else: # path to ckpt + self.log(f"[INFO] Loading {self.use_checkpoint} ...") + self.load_checkpoint(self.use_checkpoint) + + def __del__(self): + if self.log_ptr: + self.log_ptr.close() + + + def log(self, *args, **kwargs): + if self.local_rank == 0: + if not self.mute: + #print(*args) + self.console.print(*args, **kwargs) + if self.log_ptr: + print(*args, file=self.log_ptr) + self.log_ptr.flush() # write immediately to file + + ### ------------------------------ + + def train_step(self, data): + + rays_o = data['rays_o'] # [B, N, 3] + rays_d = data['rays_d'] # [B, N, 3] + bg_coords = data['bg_coords'] # [1, N, 2] + poses = data['poses'] # [B, 6] + face_mask = data['face_mask'] # [B, N] + eye_mask = data['eye_mask'] # [B, N] + lhalf_mask = data['lhalf_mask'] + eye = data['eye'] # [B, 1] + auds = data['auds'] # [B, 29, 16] + index = data['index'] # [B] + + if not self.opt.torso: + rgb = data['images'] # [B, N, 3] + else: + rgb = data['bg_torso_color'] + + B, N, C = rgb.shape + + if self.opt.color_space == 'linear': + rgb[..., :3] = srgb_to_linear(rgb[..., :3]) + + bg_color = data['bg_color'] + + if not self.opt.torso: + outputs = self.model.render(rays_o, rays_d, auds, bg_coords, poses, eye=eye, index=index, staged=False, bg_color=bg_color, perturb=True, force_all_rays=False if (self.opt.patch_size <= 1 and not self.opt.train_camera) else True, **vars(self.opt)) + else: + outputs = self.model.render_torso(rays_o, rays_d, auds, bg_coords, poses, eye=eye, index=index, staged=False, bg_color=bg_color, perturb=True, force_all_rays=False if (self.opt.patch_size <= 1 and not self.opt.train_camera) else True, **vars(self.opt)) + + if not self.opt.torso: + pred_rgb = outputs['image'] + else: + pred_rgb = outputs['torso_color'] + + + # loss factor + step_factor = min(self.global_step / self.opt.iters, 1.0) + + # MSE loss + loss = self.criterion(pred_rgb, rgb).mean(-1) # [B, N, 3] --> [B, N] + + if self.opt.torso: + loss = loss.mean() + loss += ((1 - self.model.anchor_points[:, 3])**2).mean() + return pred_rgb, rgb, loss + + # camera optim regularization + # if self.opt.train_camera: + # cam_reg = self.model.camera_dR[index].abs().mean() + self.model.camera_dT[index].abs().mean() + # loss = loss + 1e-2 * cam_reg + + if self.opt.unc_loss and not self.flip_finetune_lips: + alpha = 0.2 + uncertainty = outputs['uncertainty'] # [N], abs sum + beta = uncertainty + 1 + + unc_weight = F.softmax(uncertainty, dim=-1) * N + # print(unc_weight.shape, unc_weight.max(), unc_weight.min()) + loss *= alpha + (1-alpha)*((1 - step_factor) + step_factor * unc_weight.detach()).clamp(0, 10) + # loss *= unc_weight.detach() + + beta = uncertainty + 1 + norm_rgb = torch.norm((pred_rgb - rgb), dim=-1).detach() + loss_u = norm_rgb / (2*beta**2) + (torch.log(beta)**2) / 2 + loss_u *= face_mask.view(-1) + loss += step_factor * loss_u + + loss_static_uncertainty = (uncertainty * (~face_mask.view(-1))) + loss += 1e-3 * step_factor * loss_static_uncertainty + + # patch-based rendering + if self.opt.patch_size > 1 and not self.opt.finetune_lips: + rgb = rgb.view(-1, self.opt.patch_size, self.opt.patch_size, 3).permute(0, 3, 1, 2).contiguous() + pred_rgb = pred_rgb.view(-1, self.opt.patch_size, self.opt.patch_size, 3).permute(0, 3, 1, 2).contiguous() + + # torch_vis_2d(rgb[0]) + # torch_vis_2d(pred_rgb[0]) + + # LPIPS loss ? + loss_lpips = self.criterion_lpips_alex(pred_rgb, rgb) + loss = loss + 0.1 * loss_lpips + + # lips finetune + if self.opt.finetune_lips: + xmin, xmax, ymin, ymax = data['rect'] + rgb = rgb.view(-1, xmax - xmin, ymax - ymin, 3).permute(0, 3, 1, 2).contiguous() + pred_rgb = pred_rgb.view(-1, xmax - xmin, ymax - ymin, 3).permute(0, 3, 1, 2).contiguous() + + # torch_vis_2d(rgb[0]) + # torch_vis_2d(pred_rgb[0]) + + # LPIPS loss + loss = loss + 0.01 * self.criterion_lpips_alex(pred_rgb, rgb) + + # flip every step... if finetune lips + if self.flip_finetune_lips: + self.opt.finetune_lips = not self.opt.finetune_lips + + loss = loss.mean() + + # weights_sum loss + # entropy to encourage weights_sum to be 0 or 1. + if self.opt.torso: + alphas = outputs['torso_alpha'].clamp(1e-5, 1 - 1e-5) + # alphas = alphas ** 2 # skewed entropy, favors 0 over 1 + loss_ws = - alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas) + loss = loss + 1e-4 * loss_ws.mean() + + else: + alphas = outputs['weights_sum'].clamp(1e-5, 1 - 1e-5) + loss_ws = - alphas * torch.log2(alphas) - (1 - alphas) * torch.log2(1 - alphas) + loss = loss + 1e-4 * loss_ws.mean() + + # aud att loss (regions out of face should be static) + if self.opt.amb_aud_loss and not self.opt.torso: + ambient_aud = outputs['ambient_aud'] + loss_amb_aud = (ambient_aud * (~face_mask.view(-1))).mean() + # gradually increase it + lambda_amb = step_factor * self.opt.lambda_amb + loss += lambda_amb * loss_amb_aud + + # eye att loss + if self.opt.amb_eye_loss and not self.opt.torso: + ambient_eye = outputs['ambient_eye'] / self.opt.max_steps + + loss_cross = ((ambient_eye * ambient_aud.detach())*face_mask.view(-1)).mean() + loss += lambda_amb * loss_cross + + # regularize + if self.global_step % 16 == 0 and not self.flip_finetune_lips: + xyzs, dirs, enc_a, ind_code, eye = outputs['rays'] + xyz_delta = (torch.rand(size=xyzs.shape, dtype=xyzs.dtype, device=xyzs.device) * 2 - 1) * 1e-3 + with torch.no_grad(): + sigmas_raw, rgbs_raw, ambient_aud_raw, ambient_eye_raw, unc_raw = self.model(xyzs, dirs, enc_a.detach(), ind_code.detach(), eye) + sigmas_reg, rgbs_reg, ambient_aud_reg, ambient_eye_reg, unc_reg = self.model(xyzs+xyz_delta, dirs, enc_a.detach(), ind_code.detach(), eye) + + lambda_reg = step_factor * 1e-5 + reg_loss = 0 + if self.opt.unc_loss: + reg_loss += self.criterion(unc_raw, unc_reg).mean() + if self.opt.amb_aud_loss: + reg_loss += self.criterion(ambient_aud_raw, ambient_aud_reg).mean() + if self.opt.amb_eye_loss: + reg_loss += self.criterion(ambient_eye_raw, ambient_eye_reg).mean() + + loss += reg_loss * lambda_reg + + return pred_rgb, rgb, loss + + + def eval_step(self, data): + + rays_o = data['rays_o'] # [B, N, 3] + rays_d = data['rays_d'] # [B, N, 3] + bg_coords = data['bg_coords'] # [1, N, 2] + poses = data['poses'] # [B, 7] + + images = data['images'] # [B, H, W, 3/4] + auds = data['auds'] + index = data['index'] # [B] + eye = data['eye'] # [B, 1] + + B, H, W, C = images.shape + + if self.opt.color_space == 'linear': + images[..., :3] = srgb_to_linear(images[..., :3]) + + # eval with fixed background color + # bg_color = 1 + bg_color = data['bg_color'] + + outputs = self.model.render(rays_o, rays_d, auds, bg_coords, poses, eye=eye, index=index, staged=True, bg_color=bg_color, perturb=False, **vars(self.opt)) + + pred_rgb = outputs['image'].reshape(B, H, W, 3) + pred_depth = outputs['depth'].reshape(B, H, W) + pred_ambient_aud = outputs['ambient_aud'].reshape(B, H, W) + pred_ambient_eye = outputs['ambient_eye'].reshape(B, H, W) + pred_uncertainty = outputs['uncertainty'].reshape(B, H, W) + + loss_raw = self.criterion(pred_rgb, images) + loss = loss_raw.mean() + + return pred_rgb, pred_depth, pred_ambient_aud, pred_ambient_eye, pred_uncertainty, images, loss, loss_raw + + # moved out bg_color and perturb for more flexible control... + def test_step(self, data, bg_color=None, perturb=False): + + rays_o = data['rays_o'] # [B, N, 3] + rays_d = data['rays_d'] # [B, N, 3] + bg_coords = data['bg_coords'] # [1, N, 2] + poses = data['poses'] # [B, 7] + + auds = data['auds'] # [B, 29, 16] + index = data['index'] + H, W = data['H'], data['W'] + + # allow using a fixed eye area (avoid eye blink) at test + if self.opt.exp_eye and self.opt.fix_eye >= 0: + eye = torch.FloatTensor([self.opt.fix_eye]).view(1, 1).to(self.device) + else: + eye = data['eye'] # [B, 1] + + if bg_color is not None: + bg_color = bg_color.to(self.device) + else: + bg_color = data['bg_color'] + + self.model.testing = True + outputs = self.model.render(rays_o, rays_d, auds, bg_coords, poses, eye=eye, index=index, staged=True, bg_color=bg_color, perturb=perturb, **vars(self.opt)) + self.model.testing = False + + pred_rgb = outputs['image'].reshape(-1, H, W, 3) + pred_depth = outputs['depth'].reshape(-1, H, W) + + return pred_rgb, pred_depth + + + def save_mesh(self, save_path=None, resolution=256, threshold=10): + + if save_path is None: + save_path = os.path.join(self.workspace, 'meshes', f'{self.name}_{self.epoch}.ply') + + self.log(f"==> Saving mesh to {save_path}") + + os.makedirs(os.path.dirname(save_path), exist_ok=True) + + def query_func(pts): + with torch.no_grad(): + with torch.cuda.amp.autocast(enabled=self.fp16): + sigma = self.model.density(pts.to(self.device))['sigma'] + return sigma + + vertices, triangles = extract_geometry(self.model.aabb_infer[:3], self.model.aabb_infer[3:], resolution=resolution, threshold=threshold, query_func=query_func) + + mesh = trimesh.Trimesh(vertices, triangles, process=False) # important, process=True leads to seg fault... + mesh.export(save_path) + + self.log(f"==> Finished saving mesh.") + + ### ------------------------------ + + def train(self, train_loader, valid_loader, max_epochs): + if self.use_tensorboardX and self.local_rank == 0: + self.writer = tensorboardX.SummaryWriter(os.path.join(self.workspace, "run", self.name)) + + # mark untrained region (i.e., not covered by any camera from the training dataset) + if self.model.cuda_ray: + self.model.mark_untrained_grid(train_loader._data.poses, train_loader._data.intrinsics) + + for epoch in range(self.epoch + 1, max_epochs + 1): + self.epoch = epoch + + self.train_one_epoch(train_loader) + + if self.workspace is not None and self.local_rank == 0: + self.save_checkpoint(full=True, best=False) + + if self.epoch % self.eval_interval == 0: + self.evaluate_one_epoch(valid_loader) + self.save_checkpoint(full=False, best=True) + + if self.use_tensorboardX and self.local_rank == 0: + self.writer.close() + + def evaluate(self, loader, name=None): + self.use_tensorboardX, use_tensorboardX = False, self.use_tensorboardX + self.evaluate_one_epoch(loader, name) + self.use_tensorboardX = use_tensorboardX + + def test(self, loader, save_path=None, name=None, write_image=False): + + if save_path is None: + save_path = os.path.join(self.workspace, 'results') + + if name is None: + name = f'{self.name}_ep{self.epoch:04d}' + + os.makedirs(save_path, exist_ok=True) + + self.log(f"==> Start Test, save results to {save_path}") + + pbar = tqdm.tqdm(total=len(loader) * loader.batch_size, bar_format='{percentage:3.0f}% {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]') + self.model.eval() + + all_preds = [] + + with torch.no_grad(): + + for i, data in enumerate(loader): + + with torch.cuda.amp.autocast(enabled=self.fp16): + preds, preds_depth = self.test_step(data) + + path = os.path.join(save_path, f'{name}_{i:04d}_rgb.png') + path_depth = os.path.join(save_path, f'{name}_{i:04d}_depth.png') + + #self.log(f"[INFO] saving test image to {path}") + + if self.opt.color_space == 'linear': + preds = linear_to_srgb(preds) + + pred = preds[0].detach().cpu().numpy() + pred = (pred * 255).astype(np.uint8) + + pred_depth = preds_depth[0].detach().cpu().numpy() + pred_depth = (pred_depth * 255).astype(np.uint8) + + if write_image: + imageio.imwrite(path, pred) + imageio.imwrite(path_depth, pred_depth) + + all_preds.append(pred) + + pbar.update(loader.batch_size) + + # write video + all_preds = np.stack(all_preds, axis=0) + imageio.mimwrite(os.path.join(save_path, f'{name}.mp4'), all_preds, fps=25, quality=8, macro_block_size=1) + + self.log(f"==> Finished Test.") + + # [GUI] just train for 16 steps, without any other overhead that may slow down rendering. + def train_gui(self, train_loader, step=16): + + self.model.train() + + total_loss = torch.tensor([0], dtype=torch.float32, device=self.device) + + loader = iter(train_loader) + + # mark untrained grid + if self.global_step == 0: + self.model.mark_untrained_grid(train_loader._data.poses, train_loader._data.intrinsics) + + for _ in range(step): + + # mimic an infinite loop dataloader (in case the total dataset is smaller than step) + try: + data = next(loader) + except StopIteration: + loader = iter(train_loader) + data = next(loader) + + # update grid every 16 steps + if self.model.cuda_ray and self.global_step % self.opt.update_extra_interval == 0: + with torch.cuda.amp.autocast(enabled=self.fp16): + self.model.update_extra_state() + + self.global_step += 1 + + self.optimizer.zero_grad() + + with torch.cuda.amp.autocast(enabled=self.fp16): + preds, truths, loss = self.train_step(data) + + self.scaler.scale(loss).backward() + self.scaler.step(self.optimizer) + self.scaler.update() + + if self.scheduler_update_every_step: + self.lr_scheduler.step() + + total_loss += loss.detach() + + if self.ema is not None and self.global_step % self.ema_update_interval == 0: + self.ema.update() + + average_loss = total_loss.item() / step + + if not self.scheduler_update_every_step: + if isinstance(self.lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): + self.lr_scheduler.step(average_loss) + else: + self.lr_scheduler.step() + + outputs = { + 'loss': average_loss, + 'lr': self.optimizer.param_groups[0]['lr'], + } + + return outputs + + # [GUI] test on a single image + def test_gui(self, pose, intrinsics, W, H, auds, eye=None, index=0, bg_color=None, spp=1, downscale=1): + + # render resolution (may need downscale to for better frame rate) + rH = int(H * downscale) + rW = int(W * downscale) + intrinsics = intrinsics * downscale + + if auds is not None: + auds = auds.to(self.device) + + pose = torch.from_numpy(pose).unsqueeze(0).to(self.device) + rays = get_rays(pose, intrinsics, rH, rW, -1) + + bg_coords = get_bg_coords(rH, rW, self.device) + + if eye is not None: + eye = torch.FloatTensor([eye]).view(1, 1).to(self.device) + + data = { + 'rays_o': rays['rays_o'], + 'rays_d': rays['rays_d'], + 'H': rH, + 'W': rW, + 'auds': auds, + 'index': [index], # support choosing index for individual codes + 'eye': eye, + 'poses': pose, + 'bg_coords': bg_coords, + } + + self.model.eval() + + if self.ema is not None: + self.ema.store() + self.ema.copy_to() + + with torch.no_grad(): + with torch.cuda.amp.autocast(enabled=self.fp16): + # here spp is used as perturb random seed! + # face: do not perturb for the first spp, else lead to scatters. + preds, preds_depth = self.test_step(data, bg_color=bg_color, perturb=False if spp == 1 else spp) + + if self.ema is not None: + self.ema.restore() + + # interpolation to the original resolution + if downscale != 1: + # TODO: have to permute twice with torch... + preds = F.interpolate(preds.permute(0, 3, 1, 2), size=(H, W), mode='bilinear').permute(0, 2, 3, 1).contiguous() + preds_depth = F.interpolate(preds_depth.unsqueeze(1), size=(H, W), mode='nearest').squeeze(1) + + if self.opt.color_space == 'linear': + preds = linear_to_srgb(preds) + + pred = preds[0].detach().cpu().numpy() + pred_depth = preds_depth[0].detach().cpu().numpy() + + outputs = { + 'image': pred, + 'depth': pred_depth, + } + + return outputs + + # [GUI] test with provided data + def test_gui_with_data(self, data, W, H): + + self.model.eval() + + if self.ema is not None: + self.ema.store() + self.ema.copy_to() + + with torch.no_grad(): + with torch.cuda.amp.autocast(enabled=self.fp16): + # here spp is used as perturb random seed! + # face: do not perturb for the first spp, else lead to scatters. + preds, preds_depth = self.test_step(data, perturb=False) + + if self.ema is not None: + self.ema.restore() + + if self.opt.color_space == 'linear': + preds = linear_to_srgb(preds) + + # the H/W in data may be differnt to GUI, so we still need to resize... + preds = F.interpolate(preds.permute(0, 3, 1, 2), size=(H, W), mode='bilinear').permute(0, 2, 3, 1).contiguous() + preds_depth = F.interpolate(preds_depth.unsqueeze(1), size=(H, W), mode='nearest').squeeze(1) + + pred = preds[0].detach().cpu().numpy() + pred_depth = preds_depth[0].detach().cpu().numpy() + + outputs = { + 'image': pred, + 'depth': pred_depth, + } + + return outputs + + def train_one_epoch(self, loader): + self.log(f"==> Start Training Epoch {self.epoch}, lr={self.optimizer.param_groups[0]['lr']:.6f} ...") + + total_loss = 0 + if self.local_rank == 0 and self.report_metric_at_train: + for metric in self.metrics: + metric.clear() + + self.model.train() + + # distributedSampler: must call set_epoch() to shuffle indices across multiple epochs + # ref: https://pytorch.org/docs/stable/data.html + if self.world_size > 1: + loader.sampler.set_epoch(self.epoch) + + if self.local_rank == 0: + pbar = tqdm.tqdm(total=len(loader) * loader.batch_size, mininterval=1, bar_format='{desc}: {percentage:3.0f}% {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]') + + self.local_step = 0 + + for data in loader: + # update grid every 16 steps + if self.model.cuda_ray and self.global_step % self.opt.update_extra_interval == 0: + with torch.cuda.amp.autocast(enabled=self.fp16): + self.model.update_extra_state() + + self.local_step += 1 + self.global_step += 1 + + self.optimizer.zero_grad() + + with torch.cuda.amp.autocast(enabled=self.fp16): + preds, truths, loss = self.train_step(data) + + self.scaler.scale(loss).backward() + self.scaler.step(self.optimizer) + self.scaler.update() + + if self.scheduler_update_every_step: + self.lr_scheduler.step() + + loss_val = loss.item() + total_loss += loss_val + + if self.ema is not None and self.global_step % self.ema_update_interval == 0: + self.ema.update() + + if self.local_rank == 0: + if self.report_metric_at_train: + for metric in self.metrics: + metric.update(preds, truths) + + if self.use_tensorboardX: + self.writer.add_scalar("train/loss", loss_val, self.global_step) + self.writer.add_scalar("train/lr", self.optimizer.param_groups[0]['lr'], self.global_step) + + if self.scheduler_update_every_step: + pbar.set_description(f"loss={loss_val:.4f} ({total_loss/self.local_step:.4f}), lr={self.optimizer.param_groups[0]['lr']:.6f}") + else: + pbar.set_description(f"loss={loss_val:.4f} ({total_loss/self.local_step:.4f})") + pbar.update(loader.batch_size) + + average_loss = total_loss / self.local_step + self.stats["loss"].append(average_loss) + + if self.local_rank == 0: + pbar.close() + if self.report_metric_at_train: + for metric in self.metrics: + self.log(metric.report(), style="red") + if self.use_tensorboardX: + metric.write(self.writer, self.epoch, prefix="train") + metric.clear() + + if not self.scheduler_update_every_step: + if isinstance(self.lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): + self.lr_scheduler.step(average_loss) + else: + self.lr_scheduler.step() + + self.log(f"==> Finished Epoch {self.epoch}.") + + + def evaluate_one_epoch(self, loader, name=None): + self.log(f"++> Evaluate at epoch {self.epoch} ...") + + if name is None: + name = f'{self.name}_ep{self.epoch:04d}' + + total_loss = 0 + if self.local_rank == 0: + for metric in self.metrics: + metric.clear() + + self.model.eval() + + if self.ema is not None: + self.ema.store() + self.ema.copy_to() + + if self.local_rank == 0: + pbar = tqdm.tqdm(total=len(loader) * loader.batch_size, bar_format='{desc}: {percentage:3.0f}% {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]') + + with torch.no_grad(): + self.local_step = 0 + + for data in loader: + self.local_step += 1 + + with torch.cuda.amp.autocast(enabled=self.fp16): + preds, preds_depth, pred_ambient_aud, pred_ambient_eye, pred_uncertainty, truths, loss, loss_raw = self.eval_step(data) + + loss_val = loss.item() + total_loss += loss_val + + # only rank = 0 will perform evaluation. + if self.local_rank == 0: + + for metric in self.metrics: + metric.update(preds, truths) + + # save image + save_path = os.path.join(self.workspace, 'validation', f'{name}_{self.local_step:04d}_rgb.png') + save_path_depth = os.path.join(self.workspace, 'validation', f'{name}_{self.local_step:04d}_depth.png') + # save_path_error = os.path.join(self.workspace, 'validation', f'{name}_{self.local_step:04d}_errormap.png') + save_path_ambient_aud = os.path.join(self.workspace, 'validation', f'{name}_{self.local_step:04d}_aud.png') + save_path_ambient_eye = os.path.join(self.workspace, 'validation', f'{name}_{self.local_step:04d}_eye.png') + save_path_uncertainty = os.path.join(self.workspace, 'validation', f'{name}_{self.local_step:04d}_uncertainty.png') + #save_path_gt = os.path.join(self.workspace, 'validation', f'{name}_{self.local_step:04d}_gt.png') + + #self.log(f"==> Saving validation image to {save_path}") + os.makedirs(os.path.dirname(save_path), exist_ok=True) + + if self.opt.color_space == 'linear': + preds = linear_to_srgb(preds) + + pred = preds[0].detach().cpu().numpy() + pred_depth = preds_depth[0].detach().cpu().numpy() + # loss_raw = loss_raw[0].mean(-1).detach().cpu().numpy() + # loss_raw = (loss_raw - np.min(loss_raw)) / (np.max(loss_raw) - np.min(loss_raw)) + pred_ambient_aud = pred_ambient_aud[0].detach().cpu().numpy() + pred_ambient_aud /= np.max(pred_ambient_aud) + pred_ambient_eye = pred_ambient_eye[0].detach().cpu().numpy() + pred_ambient_eye /= np.max(pred_ambient_eye) + # pred_ambient = pred_ambient / 16 + # print(pred_ambient.shape) + pred_uncertainty = pred_uncertainty[0].detach().cpu().numpy() + # pred_uncertainty = (pred_uncertainty - np.min(pred_uncertainty)) / (np.max(pred_uncertainty) - np.min(pred_uncertainty)) + pred_uncertainty /= np.max(pred_uncertainty) + + cv2.imwrite(save_path, cv2.cvtColor((pred * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)) + + if not self.opt.torso: + cv2.imwrite(save_path_depth, (pred_depth * 255).astype(np.uint8)) + # cv2.imwrite(save_path_error, (loss_raw * 255).astype(np.uint8)) + cv2.imwrite(save_path_ambient_aud, (pred_ambient_aud * 255).astype(np.uint8)) + cv2.imwrite(save_path_ambient_eye, (pred_ambient_eye * 255).astype(np.uint8)) + cv2.imwrite(save_path_uncertainty, (pred_uncertainty * 255).astype(np.uint8)) + #cv2.imwrite(save_path_gt, cv2.cvtColor((linear_to_srgb(truths[0].detach().cpu().numpy()) * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)) + + pbar.set_description(f"loss={loss_val:.4f} ({total_loss/self.local_step:.4f})") + pbar.update(loader.batch_size) + + + average_loss = total_loss / self.local_step + self.stats["valid_loss"].append(average_loss) + + if self.local_rank == 0: + pbar.close() + if not self.use_loss_as_metric and len(self.metrics) > 0: + result = self.metrics[0].measure() + self.stats["results"].append(result if self.best_mode == 'min' else - result) # if max mode, use -result + else: + self.stats["results"].append(average_loss) # if no metric, choose best by min loss + + for metric in self.metrics: + self.log(metric.report(), style="blue") + if self.use_tensorboardX: + metric.write(self.writer, self.epoch, prefix="evaluate") + metric.clear() + + if self.ema is not None: + self.ema.restore() + + self.log(f"++> Evaluate epoch {self.epoch} Finished.") + + def save_checkpoint(self, name=None, full=False, best=False, remove_old=True): + + if name is None: + name = f'{self.name}_ep{self.epoch:04d}' + + state = { + 'epoch': self.epoch, + 'global_step': self.global_step, + 'stats': self.stats, + } + + + state['mean_count'] = self.model.mean_count + state['mean_density'] = self.model.mean_density + state['mean_density_torso'] = self.model.mean_density_torso + + if full: + state['optimizer'] = self.optimizer.state_dict() + state['lr_scheduler'] = self.lr_scheduler.state_dict() + state['scaler'] = self.scaler.state_dict() + if self.ema is not None: + state['ema'] = self.ema.state_dict() + + if not best: + + state['model'] = self.model.state_dict() + + file_path = f"{self.ckpt_path}/{name}.pth" + + if remove_old: + self.stats["checkpoints"].append(file_path) + + if len(self.stats["checkpoints"]) > self.max_keep_ckpt: + old_ckpt = self.stats["checkpoints"].pop(0) + if os.path.exists(old_ckpt): + os.remove(old_ckpt) + + torch.save(state, file_path) + + else: + if len(self.stats["results"]) > 0: + # always save new as best... (since metric cannot really reflect performance...) + if True: + + # save ema results + if self.ema is not None: + self.ema.store() + self.ema.copy_to() + + state['model'] = self.model.state_dict() + + # we don't consider continued training from the best ckpt, so we discard the unneeded density_grid to save some storage (especially important for dnerf) + if 'density_grid' in state['model']: + del state['model']['density_grid'] + + if self.ema is not None: + self.ema.restore() + + torch.save(state, self.best_path) + else: + self.log(f"[WARN] no evaluated results found, skip saving best checkpoint.") + + def load_checkpoint(self, checkpoint=None, model_only=False): + if checkpoint is None: + checkpoint_list = sorted(glob.glob(f'{self.ckpt_path}/{self.name}_ep*.pth')) + if checkpoint_list: + checkpoint = checkpoint_list[-1] + self.log(f"[INFO] Latest checkpoint is {checkpoint}") + else: + self.log("[WARN] No checkpoint found, model randomly initialized.") + return + + checkpoint_dict = torch.load(checkpoint, map_location=self.device) + + if 'model' not in checkpoint_dict: + self.model.load_state_dict(checkpoint_dict) + self.log("[INFO] loaded bare model.") + return + + missing_keys, unexpected_keys = self.model.load_state_dict(checkpoint_dict['model'], strict=False) + self.log("[INFO] loaded model.") + if len(missing_keys) > 0: + self.log(f"[WARN] missing keys: {missing_keys}") + if len(unexpected_keys) > 0: + self.log(f"[WARN] unexpected keys: {unexpected_keys}") + + if self.ema is not None and 'ema' in checkpoint_dict: + self.ema.load_state_dict(checkpoint_dict['ema']) + + + if 'mean_count' in checkpoint_dict: + self.model.mean_count = checkpoint_dict['mean_count'] + if 'mean_density' in checkpoint_dict: + self.model.mean_density = checkpoint_dict['mean_density'] + if 'mean_density_torso' in checkpoint_dict: + self.model.mean_density_torso = checkpoint_dict['mean_density_torso'] + + if model_only: + return + + self.stats = checkpoint_dict['stats'] + self.epoch = checkpoint_dict['epoch'] + self.global_step = checkpoint_dict['global_step'] + self.log(f"[INFO] load at epoch {self.epoch}, global step {self.global_step}") + + if self.optimizer and 'optimizer' in checkpoint_dict: + try: + self.optimizer.load_state_dict(checkpoint_dict['optimizer']) + self.log("[INFO] loaded optimizer.") + except: + self.log("[WARN] Failed to load optimizer.") + + if self.lr_scheduler and 'lr_scheduler' in checkpoint_dict: + try: + self.lr_scheduler.load_state_dict(checkpoint_dict['lr_scheduler']) + self.log("[INFO] loaded scheduler.") + except: + self.log("[WARN] Failed to load scheduler.") + + if self.scaler and 'scaler' in checkpoint_dict: + try: + self.scaler.load_state_dict(checkpoint_dict['scaler']) + self.log("[INFO] loaded scaler.") + except: + self.log("[WARN] Failed to load scaler.") \ No newline at end of file diff --git a/raymarching/__init__.py b/raymarching/__init__.py new file mode 100644 index 0000000..26d3cc6 --- /dev/null +++ b/raymarching/__init__.py @@ -0,0 +1 @@ +from .raymarching import * \ No newline at end of file diff --git a/raymarching/backend.py b/raymarching/backend.py new file mode 100644 index 0000000..2d41d14 --- /dev/null +++ b/raymarching/backend.py @@ -0,0 +1,40 @@ +import os +from torch.utils.cpp_extension import load + +_src_path = os.path.dirname(os.path.abspath(__file__)) + +nvcc_flags = [ + '-O3', '-std=c++14', + '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', +] + +if os.name == "posix": + c_flags = ['-O3', '-std=c++14'] +elif os.name == "nt": + c_flags = ['/O2', '/std:c++17'] + + # find cl.exe + def find_cl_path(): + import glob + for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: + paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) + if paths: + return paths[0] + + # If cl.exe is not on path, try to find it. + if os.system("where cl.exe >nul 2>nul") != 0: + cl_path = find_cl_path() + if cl_path is None: + raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") + os.environ["PATH"] += ";" + cl_path + +_backend = load(name='_raymarching_face', + extra_cflags=c_flags, + extra_cuda_cflags=nvcc_flags, + sources=[os.path.join(_src_path, 'src', f) for f in [ + 'raymarching.cu', + 'bindings.cpp', + ]], + ) + +__all__ = ['_backend'] \ No newline at end of file diff --git a/raymarching/raymarching.py b/raymarching/raymarching.py new file mode 100644 index 0000000..05fb1e6 --- /dev/null +++ b/raymarching/raymarching.py @@ -0,0 +1,671 @@ +import numpy as np +import time + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.cuda.amp import custom_bwd, custom_fwd + +try: + import _raymarching_face as _backend +except ImportError: + from .backend import _backend + +# ---------------------------------------- +# utils +# ---------------------------------------- + +class _near_far_from_aabb(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, rays_o, rays_d, aabb, min_near=0.2): + ''' near_far_from_aabb, CUDA implementation + Calculate rays' intersection time (near and far) with aabb + Args: + rays_o: float, [N, 3] + rays_d: float, [N, 3] + aabb: float, [6], (xmin, ymin, zmin, xmax, ymax, zmax) + min_near: float, scalar + Returns: + nears: float, [N] + fars: float, [N] + ''' + if not rays_o.is_cuda: rays_o = rays_o.cuda() + if not rays_d.is_cuda: rays_d = rays_d.cuda() + + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + + N = rays_o.shape[0] # num rays + + nears = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device) + fars = torch.empty(N, dtype=rays_o.dtype, device=rays_o.device) + + _backend.near_far_from_aabb(rays_o, rays_d, aabb, N, min_near, nears, fars) + + return nears, fars + +near_far_from_aabb = _near_far_from_aabb.apply + + +class _sph_from_ray(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, rays_o, rays_d, radius): + ''' sph_from_ray, CUDA implementation + get spherical coordinate on the background sphere from rays. + Assume rays_o are inside the Sphere(radius). + Args: + rays_o: [N, 3] + rays_d: [N, 3] + radius: scalar, float + Return: + coords: [N, 2], in [-1, 1], theta and phi on a sphere. (further-surface) + ''' + if not rays_o.is_cuda: rays_o = rays_o.cuda() + if not rays_d.is_cuda: rays_d = rays_d.cuda() + + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + + N = rays_o.shape[0] # num rays + + coords = torch.empty(N, 2, dtype=rays_o.dtype, device=rays_o.device) + + _backend.sph_from_ray(rays_o, rays_d, radius, N, coords) + + return coords + +sph_from_ray = _sph_from_ray.apply + + +class _morton3D(Function): + @staticmethod + def forward(ctx, coords): + ''' morton3D, CUDA implementation + Args: + coords: [N, 3], int32, in [0, 128) (for some reason there is no uint32 tensor in torch...) + TODO: check if the coord range is valid! (current 128 is safe) + Returns: + indices: [N], int32, in [0, 128^3) + + ''' + if not coords.is_cuda: coords = coords.cuda() + + N = coords.shape[0] + + indices = torch.empty(N, dtype=torch.int32, device=coords.device) + + _backend.morton3D(coords.int(), N, indices) + + return indices + +morton3D = _morton3D.apply + +class _morton3D_invert(Function): + @staticmethod + def forward(ctx, indices): + ''' morton3D_invert, CUDA implementation + Args: + indices: [N], int32, in [0, 128^3) + Returns: + coords: [N, 3], int32, in [0, 128) + + ''' + if not indices.is_cuda: indices = indices.cuda() + + N = indices.shape[0] + + coords = torch.empty(N, 3, dtype=torch.int32, device=indices.device) + + _backend.morton3D_invert(indices.int(), N, coords) + + return coords + +morton3D_invert = _morton3D_invert.apply + + +class _packbits(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, grid, thresh, bitfield=None): + ''' packbits, CUDA implementation + Pack up the density grid into a bit field to accelerate ray marching. + Args: + grid: float, [C, H * H * H], assume H % 2 == 0 + thresh: float, threshold + Returns: + bitfield: uint8, [C, H * H * H / 8] + ''' + if not grid.is_cuda: grid = grid.cuda() + grid = grid.contiguous() + + C = grid.shape[0] + H3 = grid.shape[1] + N = C * H3 // 8 + + if bitfield is None: + bitfield = torch.empty(N, dtype=torch.uint8, device=grid.device) + + _backend.packbits(grid, N, thresh, bitfield) + + return bitfield + +packbits = _packbits.apply + + +class _morton3D_dilation(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, grid): + ''' max pooling with morton coord, CUDA implementation + or maybe call it dilation... we don't support adjust kernel size. + Args: + grid: float, [C, H * H * H], assume H % 2 == 0 + Returns: + grid_dilate: float, [C, H * H * H], assume H % 2 == 0bitfield: uint8, [C, H * H * H / 8] + ''' + if not grid.is_cuda: grid = grid.cuda() + grid = grid.contiguous() + + C = grid.shape[0] + H3 = grid.shape[1] + H = int(np.cbrt(H3)) + grid_dilation = torch.empty_like(grid) + + _backend.morton3D_dilation(grid, C, H, grid_dilation) + + return grid_dilation + +morton3D_dilation = _morton3D_dilation.apply + +# ---------------------------------------- +# train functions +# ---------------------------------------- + +class _march_rays_train(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, rays_o, rays_d, bound, density_bitfield, C, H, nears, fars, step_counter=None, mean_count=-1, perturb=False, align=-1, force_all_rays=False, dt_gamma=0, max_steps=1024): + ''' march rays to generate points (forward only) + Args: + rays_o/d: float, [N, 3] + bound: float, scalar + density_bitfield: uint8: [CHHH // 8] + C: int + H: int + nears/fars: float, [N] + step_counter: int32, (2), used to count the actual number of generated points. + mean_count: int32, estimated mean steps to accelerate training. (but will randomly drop rays if the actual point count exceeded this threshold.) + perturb: bool + align: int, pad output so its size is dividable by align, set to -1 to disable. + force_all_rays: bool, ignore step_counter and mean_count, always calculate all rays. Useful if rendering the whole image, instead of some rays. + dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance) + max_steps: int, max number of sampled points along each ray, also affect min_stepsize. + Returns: + xyzs: float, [M, 3], all generated points' coords. (all rays concated, need to use `rays` to extract points belonging to each ray) + dirs: float, [M, 3], all generated points' view dirs. + deltas: float, [M, 2], first is delta_t, second is rays_t + rays: int32, [N, 3], all rays' (index, point_offset, point_count), e.g., xyzs[rays[i, 1]:rays[i, 1] + rays[i, 2]] --> points belonging to rays[i, 0] + ''' + + if not rays_o.is_cuda: rays_o = rays_o.cuda() + if not rays_d.is_cuda: rays_d = rays_d.cuda() + if not density_bitfield.is_cuda: density_bitfield = density_bitfield.cuda() + + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + density_bitfield = density_bitfield.contiguous() + + N = rays_o.shape[0] # num rays + M = N * max_steps # init max points number in total + + # running average based on previous epoch (mimic `measured_batch_size_before_compaction` in instant-ngp) + # It estimate the max points number to enable faster training, but will lead to random ignored rays if underestimated. + if not force_all_rays and mean_count > 0: + if align > 0: + mean_count += align - mean_count % align + M = mean_count + + xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + deltas = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device) + rays = torch.empty(N, 3, dtype=torch.int32, device=rays_o.device) # id, offset, num_steps + + if step_counter is None: + step_counter = torch.zeros(2, dtype=torch.int32, device=rays_o.device) # point counter, ray counter + + if perturb: + noises = torch.rand(N, dtype=rays_o.dtype, device=rays_o.device) + else: + noises = torch.zeros(N, dtype=rays_o.dtype, device=rays_o.device) + + _backend.march_rays_train(rays_o, rays_d, density_bitfield, bound, dt_gamma, max_steps, N, C, H, M, nears, fars, xyzs, dirs, deltas, rays, step_counter, noises) # m is the actually used points number + + #print(step_counter, M) + + # only used at the first (few) epochs. + if force_all_rays or mean_count <= 0: + m = step_counter[0].item() # D2H copy + if align > 0: + m += align - m % align + xyzs = xyzs[:m] + dirs = dirs[:m] + deltas = deltas[:m] + + torch.cuda.empty_cache() + + ctx.save_for_backward(rays, deltas) + + return xyzs, dirs, deltas, rays + + # to support optimizing camera poses. + @staticmethod + @custom_bwd + def backward(ctx, grad_xyzs, grad_dirs, grad_deltas, grad_rays): + # grad_xyzs/dirs: [M, 3] + + rays, deltas = ctx.saved_tensors + + N = rays.shape[0] + M = grad_xyzs.shape[0] + + grad_rays_o = torch.zeros(N, 3, device=rays.device) + grad_rays_d = torch.zeros(N, 3, device=rays.device) + + _backend.march_rays_train_backward(grad_xyzs, grad_dirs, rays, deltas, N, M, grad_rays_o, grad_rays_d) + + return grad_rays_o, grad_rays_d, None, None, None, None, None, None, None, None, None, None, None, None, None + +march_rays_train = _march_rays_train.apply + + +class _composite_rays_train(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, sigmas, rgbs, ambient, deltas, rays, T_thresh=1e-4): + ''' composite rays' rgbs, according to the ray marching formula. + Args: + rgbs: float, [M, 3] + sigmas: float, [M,] + ambient: float, [M,] (after summing up the last dimension) + deltas: float, [M, 2] + rays: int32, [N, 3] + Returns: + weights_sum: float, [N,], the alpha channel + depth: float, [N, ], the Depth + image: float, [N, 3], the RGB channel (after multiplying alpha!) + ''' + + sigmas = sigmas.contiguous() + rgbs = rgbs.contiguous() + ambient = ambient.contiguous() + + M = sigmas.shape[0] + N = rays.shape[0] + + weights_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + ambient_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + depth = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + image = torch.empty(N, 3, dtype=sigmas.dtype, device=sigmas.device) + + _backend.composite_rays_train_forward(sigmas, rgbs, ambient, deltas, rays, M, N, T_thresh, weights_sum, ambient_sum, depth, image) + + ctx.save_for_backward(sigmas, rgbs, ambient, deltas, rays, weights_sum, ambient_sum, depth, image) + ctx.dims = [M, N, T_thresh] + + return weights_sum, ambient_sum, depth, image + + @staticmethod + @custom_bwd + def backward(ctx, grad_weights_sum, grad_ambient_sum, grad_depth, grad_image): + + # NOTE: grad_depth is not used now! It won't be propagated to sigmas. + + grad_weights_sum = grad_weights_sum.contiguous() + grad_ambient_sum = grad_ambient_sum.contiguous() + grad_image = grad_image.contiguous() + + sigmas, rgbs, ambient, deltas, rays, weights_sum, ambient_sum, depth, image = ctx.saved_tensors + M, N, T_thresh = ctx.dims + + grad_sigmas = torch.zeros_like(sigmas) + grad_rgbs = torch.zeros_like(rgbs) + grad_ambient = torch.zeros_like(ambient) + + _backend.composite_rays_train_backward(grad_weights_sum, grad_ambient_sum, grad_image, sigmas, rgbs, ambient, deltas, rays, weights_sum, ambient_sum, image, M, N, T_thresh, grad_sigmas, grad_rgbs, grad_ambient) + + return grad_sigmas, grad_rgbs, grad_ambient, None, None, None + + +composite_rays_train = _composite_rays_train.apply + +# ---------------------------------------- +# infer functions +# ---------------------------------------- + +class _march_rays(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, n_alive, n_step, rays_alive, rays_t, rays_o, rays_d, bound, density_bitfield, C, H, near, far, align=-1, perturb=False, dt_gamma=0, max_steps=1024): + ''' march rays to generate points (forward only, for inference) + Args: + n_alive: int, number of alive rays + n_step: int, how many steps we march + rays_alive: int, [N], the alive rays' IDs in N (N >= n_alive, but we only use first n_alive) + rays_t: float, [N], the alive rays' time, we only use the first n_alive. + rays_o/d: float, [N, 3] + bound: float, scalar + density_bitfield: uint8: [CHHH // 8] + C: int + H: int + nears/fars: float, [N] + align: int, pad output so its size is dividable by align, set to -1 to disable. + perturb: bool/int, int > 0 is used as the random seed. + dt_gamma: float, called cone_angle in instant-ngp, exponentially accelerate ray marching if > 0. (very significant effect, but generally lead to worse performance) + max_steps: int, max number of sampled points along each ray, also affect min_stepsize. + Returns: + xyzs: float, [n_alive * n_step, 3], all generated points' coords + dirs: float, [n_alive * n_step, 3], all generated points' view dirs. + deltas: float, [n_alive * n_step, 2], all generated points' deltas (here we record two deltas, the first is for RGB, the second for depth). + ''' + + if not rays_o.is_cuda: rays_o = rays_o.cuda() + if not rays_d.is_cuda: rays_d = rays_d.cuda() + + rays_o = rays_o.contiguous().view(-1, 3) + rays_d = rays_d.contiguous().view(-1, 3) + + M = n_alive * n_step + + if align > 0: + M += align - (M % align) + + xyzs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + dirs = torch.zeros(M, 3, dtype=rays_o.dtype, device=rays_o.device) + deltas = torch.zeros(M, 2, dtype=rays_o.dtype, device=rays_o.device) # 2 vals, one for rgb, one for depth + + if perturb: + # torch.manual_seed(perturb) # test_gui uses spp index as seed + noises = torch.rand(n_alive, dtype=rays_o.dtype, device=rays_o.device) + else: + noises = torch.zeros(n_alive, dtype=rays_o.dtype, device=rays_o.device) + + _backend.march_rays(n_alive, n_step, rays_alive, rays_t, rays_o, rays_d, bound, dt_gamma, max_steps, C, H, density_bitfield, near, far, xyzs, dirs, deltas, noises) + + return xyzs, dirs, deltas + +march_rays = _march_rays.apply + + +class _composite_rays(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # need to cast sigmas & rgbs to float + def forward(ctx, n_alive, n_step, rays_alive, rays_t, sigmas, rgbs, deltas, weights_sum, depth, image, T_thresh=1e-2): + ''' composite rays' rgbs, according to the ray marching formula. (for inference) + Args: + n_alive: int, number of alive rays + n_step: int, how many steps we march + rays_alive: int, [n_alive], the alive rays' IDs in N (N >= n_alive) + rays_t: float, [N], the alive rays' time + sigmas: float, [n_alive * n_step,] + rgbs: float, [n_alive * n_step, 3] + deltas: float, [n_alive * n_step, 2], all generated points' deltas (here we record two deltas, the first is for RGB, the second for depth). + In-place Outputs: + weights_sum: float, [N,], the alpha channel + depth: float, [N,], the depth value + image: float, [N, 3], the RGB channel (after multiplying alpha!) + ''' + _backend.composite_rays(n_alive, n_step, T_thresh, rays_alive, rays_t, sigmas, rgbs, deltas, weights_sum, depth, image) + return tuple() + + +composite_rays = _composite_rays.apply + + +class _composite_rays_ambient(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # need to cast sigmas & rgbs to float + def forward(ctx, n_alive, n_step, rays_alive, rays_t, sigmas, rgbs, deltas, ambients, weights_sum, depth, image, ambient_sum, T_thresh=1e-2): + _backend.composite_rays_ambient(n_alive, n_step, T_thresh, rays_alive, rays_t, sigmas, rgbs, deltas, ambients, weights_sum, depth, image, ambient_sum) + return tuple() + + +composite_rays_ambient = _composite_rays_ambient.apply + + + + + +# custom + +class _composite_rays_train_sigma(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, sigmas, rgbs, ambient, deltas, rays, T_thresh=1e-4): + ''' composite rays' rgbs, according to the ray marching formula. + Args: + rgbs: float, [M, 3] + sigmas: float, [M,] + ambient: float, [M,] (after summing up the last dimension) + deltas: float, [M, 2] + rays: int32, [N, 3] + Returns: + weights_sum: float, [N,], the alpha channel + depth: float, [N, ], the Depth + image: float, [N, 3], the RGB channel (after multiplying alpha!) + ''' + + sigmas = sigmas.contiguous() + rgbs = rgbs.contiguous() + ambient = ambient.contiguous() + + M = sigmas.shape[0] + N = rays.shape[0] + + weights_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + ambient_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + depth = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + image = torch.empty(N, 3, dtype=sigmas.dtype, device=sigmas.device) + + _backend.composite_rays_train_sigma_forward(sigmas, rgbs, ambient, deltas, rays, M, N, T_thresh, weights_sum, ambient_sum, depth, image) + + ctx.save_for_backward(sigmas, rgbs, ambient, deltas, rays, weights_sum, ambient_sum, depth, image) + ctx.dims = [M, N, T_thresh] + + return weights_sum, ambient_sum, depth, image + + @staticmethod + @custom_bwd + def backward(ctx, grad_weights_sum, grad_ambient_sum, grad_depth, grad_image): + + # NOTE: grad_depth is not used now! It won't be propagated to sigmas. + + grad_weights_sum = grad_weights_sum.contiguous() + grad_ambient_sum = grad_ambient_sum.contiguous() + grad_image = grad_image.contiguous() + + sigmas, rgbs, ambient, deltas, rays, weights_sum, ambient_sum, depth, image = ctx.saved_tensors + M, N, T_thresh = ctx.dims + + grad_sigmas = torch.zeros_like(sigmas) + grad_rgbs = torch.zeros_like(rgbs) + grad_ambient = torch.zeros_like(ambient) + + _backend.composite_rays_train_sigma_backward(grad_weights_sum, grad_ambient_sum, grad_image, sigmas, rgbs, ambient, deltas, rays, weights_sum, ambient_sum, image, M, N, T_thresh, grad_sigmas, grad_rgbs, grad_ambient) + + return grad_sigmas, grad_rgbs, grad_ambient, None, None, None + + +composite_rays_train_sigma = _composite_rays_train_sigma.apply + + +class _composite_rays_ambient_sigma(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # need to cast sigmas & rgbs to float + def forward(ctx, n_alive, n_step, rays_alive, rays_t, sigmas, rgbs, deltas, ambients, weights_sum, depth, image, ambient_sum, T_thresh=1e-2): + _backend.composite_rays_ambient_sigma(n_alive, n_step, T_thresh, rays_alive, rays_t, sigmas, rgbs, deltas, ambients, weights_sum, depth, image, ambient_sum) + return tuple() + + +composite_rays_ambient_sigma = _composite_rays_ambient_sigma.apply + + + +# uncertainty +class _composite_rays_train_uncertainty(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, sigmas, rgbs, ambient, uncertainty, deltas, rays, T_thresh=1e-4): + ''' composite rays' rgbs, according to the ray marching formula. + Args: + rgbs: float, [M, 3] + sigmas: float, [M,] + ambient: float, [M,] (after summing up the last dimension) + deltas: float, [M, 2] + rays: int32, [N, 3] + Returns: + weights_sum: float, [N,], the alpha channel + depth: float, [N, ], the Depth + image: float, [N, 3], the RGB channel (after multiplying alpha!) + ''' + + sigmas = sigmas.contiguous() + rgbs = rgbs.contiguous() + ambient = ambient.contiguous() + uncertainty = uncertainty.contiguous() + + M = sigmas.shape[0] + N = rays.shape[0] + + weights_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + ambient_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + uncertainty_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + depth = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + image = torch.empty(N, 3, dtype=sigmas.dtype, device=sigmas.device) + + _backend.composite_rays_train_uncertainty_forward(sigmas, rgbs, ambient, uncertainty, deltas, rays, M, N, T_thresh, weights_sum, ambient_sum, uncertainty_sum, depth, image) + + ctx.save_for_backward(sigmas, rgbs, ambient, uncertainty, deltas, rays, weights_sum, ambient_sum, uncertainty_sum, depth, image) + ctx.dims = [M, N, T_thresh] + + return weights_sum, ambient_sum, uncertainty_sum, depth, image + + @staticmethod + @custom_bwd + def backward(ctx, grad_weights_sum, grad_ambient_sum, grad_uncertainty_sum, grad_depth, grad_image): + + # NOTE: grad_depth is not used now! It won't be propagated to sigmas. + + grad_weights_sum = grad_weights_sum.contiguous() + grad_ambient_sum = grad_ambient_sum.contiguous() + grad_uncertainty_sum = grad_uncertainty_sum.contiguous() + grad_image = grad_image.contiguous() + + sigmas, rgbs, ambient, uncertainty, deltas, rays, weights_sum, ambient_sum, uncertainty_sum, depth, image = ctx.saved_tensors + M, N, T_thresh = ctx.dims + + grad_sigmas = torch.zeros_like(sigmas) + grad_rgbs = torch.zeros_like(rgbs) + grad_ambient = torch.zeros_like(ambient) + grad_uncertainty = torch.zeros_like(uncertainty) + + _backend.composite_rays_train_uncertainty_backward(grad_weights_sum, grad_ambient_sum, grad_uncertainty_sum, grad_image, sigmas, rgbs, ambient, uncertainty, deltas, rays, weights_sum, ambient_sum, uncertainty_sum, image, M, N, T_thresh, grad_sigmas, grad_rgbs, grad_ambient, grad_uncertainty) + + return grad_sigmas, grad_rgbs, grad_ambient, grad_uncertainty, None, None, None + + +composite_rays_train_uncertainty = _composite_rays_train_uncertainty.apply + + +class _composite_rays_uncertainty(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # need to cast sigmas & rgbs to float + def forward(ctx, n_alive, n_step, rays_alive, rays_t, sigmas, rgbs, deltas, ambients, uncertainties, weights_sum, depth, image, ambient_sum, uncertainty_sum, T_thresh=1e-2): + _backend.composite_rays_uncertainty(n_alive, n_step, T_thresh, rays_alive, rays_t, sigmas, rgbs, deltas, ambients, uncertainties, weights_sum, depth, image, ambient_sum, uncertainty_sum) + return tuple() + + +composite_rays_uncertainty = _composite_rays_uncertainty.apply + + + +# triplane(eye) +class _composite_rays_train_triplane(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) + def forward(ctx, sigmas, rgbs, amb_aud, amb_eye, uncertainty, deltas, rays, T_thresh=1e-4): + ''' composite rays' rgbs, according to the ray marching formula. + Args: + rgbs: float, [M, 3] + sigmas: float, [M,] + ambient: float, [M,] (after summing up the last dimension) + deltas: float, [M, 2] + rays: int32, [N, 3] + Returns: + weights_sum: float, [N,], the alpha channel + depth: float, [N, ], the Depth + image: float, [N, 3], the RGB channel (after multiplying alpha!) + ''' + + sigmas = sigmas.contiguous() + rgbs = rgbs.contiguous() + amb_aud = amb_aud.contiguous() + amb_eye = amb_eye.contiguous() + uncertainty = uncertainty.contiguous() + + M = sigmas.shape[0] + N = rays.shape[0] + + weights_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + amb_aud_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + amb_eye_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + uncertainty_sum = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + depth = torch.empty(N, dtype=sigmas.dtype, device=sigmas.device) + image = torch.empty(N, 3, dtype=sigmas.dtype, device=sigmas.device) + + _backend.composite_rays_train_triplane_forward(sigmas, rgbs, amb_aud, amb_eye, uncertainty, deltas, rays, M, N, T_thresh, weights_sum, amb_aud_sum, amb_eye_sum, uncertainty_sum, depth, image) + + ctx.save_for_backward(sigmas, rgbs, amb_aud, amb_eye, uncertainty, deltas, rays, weights_sum, amb_aud_sum, amb_eye_sum, uncertainty_sum, depth, image) + ctx.dims = [M, N, T_thresh] + + return weights_sum, amb_aud_sum, amb_eye_sum, uncertainty_sum, depth, image + + @staticmethod + @custom_bwd + def backward(ctx, grad_weights_sum, grad_amb_aud_sum, grad_amb_eye_sum, grad_uncertainty_sum, grad_depth, grad_image): + + # NOTE: grad_depth is not used now! It won't be propagated to sigmas. + + grad_weights_sum = grad_weights_sum.contiguous() + grad_amb_aud_sum = grad_amb_aud_sum.contiguous() + grad_amb_eye_sum = grad_amb_eye_sum.contiguous() + grad_uncertainty_sum = grad_uncertainty_sum.contiguous() + grad_image = grad_image.contiguous() + + sigmas, rgbs, amb_aud, amb_eye, uncertainty, deltas, rays, weights_sum, amb_aud_sum, amb_eye_sum, uncertainty_sum, depth, image = ctx.saved_tensors + M, N, T_thresh = ctx.dims + + grad_sigmas = torch.zeros_like(sigmas) + grad_rgbs = torch.zeros_like(rgbs) + grad_amb_aud = torch.zeros_like(amb_aud) + grad_amb_eye = torch.zeros_like(amb_eye) + grad_uncertainty = torch.zeros_like(uncertainty) + + _backend.composite_rays_train_triplane_backward(grad_weights_sum, grad_amb_aud_sum, grad_amb_eye_sum, grad_uncertainty_sum, grad_image, sigmas, rgbs, amb_aud, amb_eye, uncertainty, deltas, rays, weights_sum, amb_aud_sum, amb_eye_sum, uncertainty_sum, image, M, N, T_thresh, grad_sigmas, grad_rgbs, grad_amb_aud, grad_amb_eye, grad_uncertainty) + + return grad_sigmas, grad_rgbs, grad_amb_aud, grad_amb_eye, grad_uncertainty, None, None, None + + +composite_rays_train_triplane = _composite_rays_train_triplane.apply + + +class _composite_rays_triplane(Function): + @staticmethod + @custom_fwd(cast_inputs=torch.float32) # need to cast sigmas & rgbs to float + def forward(ctx, n_alive, n_step, rays_alive, rays_t, sigmas, rgbs, deltas, ambs_aud, ambs_eye, uncertainties, weights_sum, depth, image, amb_aud_sum, amb_eye_sum, uncertainty_sum, T_thresh=1e-2): + _backend.composite_rays_triplane(n_alive, n_step, T_thresh, rays_alive, rays_t, sigmas, rgbs, deltas, ambs_aud, ambs_eye, uncertainties, weights_sum, depth, image, amb_aud_sum, amb_eye_sum, uncertainty_sum) + return tuple() + + +composite_rays_triplane = _composite_rays_triplane.apply \ No newline at end of file diff --git a/raymarching/setup.py b/raymarching/setup.py new file mode 100644 index 0000000..c2fbd1b --- /dev/null +++ b/raymarching/setup.py @@ -0,0 +1,63 @@ +import os +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +_src_path = os.path.dirname(os.path.abspath(__file__)) + +nvcc_flags = [ + '-O3', '-std=c++14', + # '-lineinfo', # to debug illegal memory access + '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', +] + +if os.name == "posix": + c_flags = ['-O3', '-std=c++14'] +elif os.name == "nt": + c_flags = ['/O2', '/std:c++17'] + + # find cl.exe + def find_cl_path(): + import glob + for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: + paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) + if paths: + return paths[0] + + # If cl.exe is not on path, try to find it. + if os.system("where cl.exe >nul 2>nul") != 0: + cl_path = find_cl_path() + if cl_path is None: + raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") + os.environ["PATH"] += ";" + cl_path + +''' +Usage: + +python setup.py build_ext --inplace # build extensions locally, do not install (only can be used from the parent directory) + +python setup.py install # build extensions and install (copy) to PATH. +pip install . # ditto but better (e.g., dependency & metadata handling) + +python setup.py develop # build extensions and install (symbolic) to PATH. +pip install -e . # ditto but better (e.g., dependency & metadata handling) + +''' +setup( + name='raymarching_face', # package name, import this to use python API + ext_modules=[ + CUDAExtension( + name='_raymarching_face', # extension name, import this to use CUDA API + sources=[os.path.join(_src_path, 'src', f) for f in [ + 'raymarching.cu', + 'bindings.cpp', + ]], + extra_compile_args={ + 'cxx': c_flags, + 'nvcc': nvcc_flags, + } + ), + ], + cmdclass={ + 'build_ext': BuildExtension, + } +) \ No newline at end of file diff --git a/raymarching/src/bindings.cpp b/raymarching/src/bindings.cpp new file mode 100644 index 0000000..f8298bf --- /dev/null +++ b/raymarching/src/bindings.cpp @@ -0,0 +1,39 @@ +#include + +#include "raymarching.h" + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + // utils + m.def("packbits", &packbits, "packbits (CUDA)"); + m.def("near_far_from_aabb", &near_far_from_aabb, "near_far_from_aabb (CUDA)"); + m.def("sph_from_ray", &sph_from_ray, "sph_from_ray (CUDA)"); + m.def("morton3D", &morton3D, "morton3D (CUDA)"); + m.def("morton3D_invert", &morton3D_invert, "morton3D_invert (CUDA)"); + m.def("morton3D_dilation", &morton3D_dilation, "morton3D_dilation (CUDA)"); + // train + m.def("march_rays_train", &march_rays_train, "march_rays_train (CUDA)"); + m.def("march_rays_train_backward", &march_rays_train_backward, "march_rays_train_backward (CUDA)"); + m.def("composite_rays_train_forward", &composite_rays_train_forward, "composite_rays_train_forward (CUDA)"); + m.def("composite_rays_train_backward", &composite_rays_train_backward, "composite_rays_train_backward (CUDA)"); + // infer + m.def("march_rays", &march_rays, "march rays (CUDA)"); + m.def("composite_rays", &composite_rays, "composite rays (CUDA)"); + m.def("composite_rays_ambient", &composite_rays_ambient, "composite rays with ambient (CUDA)"); + + // train + m.def("composite_rays_train_sigma_forward", &composite_rays_train_sigma_forward, "composite_rays_train_forward (CUDA)"); + m.def("composite_rays_train_sigma_backward", &composite_rays_train_sigma_backward, "composite_rays_train_backward (CUDA)"); + // infer + m.def("composite_rays_ambient_sigma", &composite_rays_ambient_sigma, "composite rays with ambient (CUDA)"); + + // uncertainty train + m.def("composite_rays_train_uncertainty_forward", &composite_rays_train_uncertainty_forward, "composite_rays_train_forward (CUDA)"); + m.def("composite_rays_train_uncertainty_backward", &composite_rays_train_uncertainty_backward, "composite_rays_train_backward (CUDA)"); + m.def("composite_rays_uncertainty", &composite_rays_uncertainty, "composite rays with ambient (CUDA)"); + + // triplane + m.def("composite_rays_train_triplane_forward", &composite_rays_train_triplane_forward, "composite_rays_train_forward (CUDA)"); + m.def("composite_rays_train_triplane_backward", &composite_rays_train_triplane_backward, "composite_rays_train_backward (CUDA)"); + m.def("composite_rays_triplane", &composite_rays_triplane, "composite rays with ambient (CUDA)"); + +} \ No newline at end of file diff --git a/raymarching/src/raymarching.cu b/raymarching/src/raymarching.cu new file mode 100644 index 0000000..d7788b7 --- /dev/null +++ b/raymarching/src/raymarching.cu @@ -0,0 +1,2258 @@ +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor") +#define CHECK_IS_INT(x) TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, #x " must be an int tensor") +#define CHECK_IS_FLOATING(x) TORCH_CHECK(x.scalar_type() == at::ScalarType::Float || x.scalar_type() == at::ScalarType::Half || x.scalar_type() == at::ScalarType::Double, #x " must be a floating tensor") + + +inline constexpr __device__ float SQRT3() { return 1.7320508075688772f; } +inline constexpr __device__ float RSQRT3() { return 0.5773502691896258f; } +inline constexpr __device__ float PI() { return 3.141592653589793f; } +inline constexpr __device__ float RPI() { return 0.3183098861837907f; } + + +template +inline __host__ __device__ T div_round_up(T val, T divisor) { + return (val + divisor - 1) / divisor; +} + +inline __host__ __device__ float signf(const float x) { + return copysignf(1.0, x); +} + +inline __host__ __device__ float clamp(const float x, const float min, const float max) { + return fminf(max, fmaxf(min, x)); +} + +inline __host__ __device__ void swapf(float& a, float& b) { + float c = a; a = b; b = c; +} + +inline __device__ int mip_from_pos(const float x, const float y, const float z, const float max_cascade) { + const float mx = fmaxf(fabsf(x), fmaxf(fabs(y), fabs(z))); + int exponent; + frexpf(mx, &exponent); // [0, 0.5) --> -1, [0.5, 1) --> 0, [1, 2) --> 1, [2, 4) --> 2, ... + return fminf(max_cascade - 1, fmaxf(0, exponent)); +} + +inline __device__ int mip_from_dt(const float dt, const float H, const float max_cascade) { + const float mx = dt * H * 0.5; + int exponent; + frexpf(mx, &exponent); + return fminf(max_cascade - 1, fmaxf(0, exponent)); +} + +inline __host__ __device__ uint32_t __expand_bits(uint32_t v) +{ + v = (v * 0x00010001u) & 0xFF0000FFu; + v = (v * 0x00000101u) & 0x0F00F00Fu; + v = (v * 0x00000011u) & 0xC30C30C3u; + v = (v * 0x00000005u) & 0x49249249u; + return v; +} + +inline __host__ __device__ uint32_t __morton3D(uint32_t x, uint32_t y, uint32_t z) +{ + uint32_t xx = __expand_bits(x); + uint32_t yy = __expand_bits(y); + uint32_t zz = __expand_bits(z); + return xx | (yy << 1) | (zz << 2); +} + +inline __host__ __device__ uint32_t __morton3D_invert(uint32_t x) +{ + x = x & 0x49249249; + x = (x | (x >> 2)) & 0xc30c30c3; + x = (x | (x >> 4)) & 0x0f00f00f; + x = (x | (x >> 8)) & 0xff0000ff; + x = (x | (x >> 16)) & 0x0000ffff; + return x; +} + + +//////////////////////////////////////////////////// +///////////// utils ///////////// +//////////////////////////////////////////////////// + +// rays_o/d: [N, 3] +// nears/fars: [N] +// scalar_t should always be float in use. +template +__global__ void kernel_near_far_from_aabb( + const scalar_t * __restrict__ rays_o, + const scalar_t * __restrict__ rays_d, + const scalar_t * __restrict__ aabb, + const uint32_t N, + const float min_near, + scalar_t * nears, scalar_t * fars +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + rays_o += n * 3; + rays_d += n * 3; + + const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2]; + const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2]; + const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz; + + // get near far (assume cube scene) + float near = (aabb[0] - ox) * rdx; + float far = (aabb[3] - ox) * rdx; + if (near > far) swapf(near, far); + + float near_y = (aabb[1] - oy) * rdy; + float far_y = (aabb[4] - oy) * rdy; + if (near_y > far_y) swapf(near_y, far_y); + + if (near > far_y || near_y > far) { + nears[n] = fars[n] = std::numeric_limits::max(); + return; + } + + if (near_y > near) near = near_y; + if (far_y < far) far = far_y; + + float near_z = (aabb[2] - oz) * rdz; + float far_z = (aabb[5] - oz) * rdz; + if (near_z > far_z) swapf(near_z, far_z); + + if (near > far_z || near_z > far) { + nears[n] = fars[n] = std::numeric_limits::max(); + return; + } + + if (near_z > near) near = near_z; + if (far_z < far) far = far_z; + + if (near < min_near) near = min_near; + + nears[n] = near; + fars[n] = far; +} + + +void near_far_from_aabb(const at::Tensor rays_o, const at::Tensor rays_d, const at::Tensor aabb, const uint32_t N, const float min_near, at::Tensor nears, at::Tensor fars) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + rays_o.scalar_type(), "near_far_from_aabb", ([&] { + kernel_near_far_from_aabb<<>>(rays_o.data_ptr(), rays_d.data_ptr(), aabb.data_ptr(), N, min_near, nears.data_ptr(), fars.data_ptr()); + })); +} + + +// rays_o/d: [N, 3] +// radius: float +// coords: [N, 2] +template +__global__ void kernel_sph_from_ray( + const scalar_t * __restrict__ rays_o, + const scalar_t * __restrict__ rays_d, + const float radius, + const uint32_t N, + scalar_t * coords +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + rays_o += n * 3; + rays_d += n * 3; + coords += n * 2; + + const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2]; + const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2]; + const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz; + + // solve t from || o + td || = radius + const float A = dx * dx + dy * dy + dz * dz; + const float B = ox * dx + oy * dy + oz * dz; // in fact B / 2 + const float C = ox * ox + oy * oy + oz * oz - radius * radius; + + const float t = (- B + sqrtf(B * B - A * C)) / A; // always use the larger solution (positive) + + // solve theta, phi (assume y is the up axis) + const float x = ox + t * dx, y = oy + t * dy, z = oz + t * dz; + const float theta = atan2(sqrtf(x * x + z * z), y); // [0, PI) + const float phi = atan2(z, x); // [-PI, PI) + + // normalize to [-1, 1] + coords[0] = 2 * theta * RPI() - 1; + coords[1] = phi * RPI(); +} + + +void sph_from_ray(const at::Tensor rays_o, const at::Tensor rays_d, const float radius, const uint32_t N, at::Tensor coords) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + rays_o.scalar_type(), "sph_from_ray", ([&] { + kernel_sph_from_ray<<>>(rays_o.data_ptr(), rays_d.data_ptr(), radius, N, coords.data_ptr()); + })); +} + + +// coords: int32, [N, 3] +// indices: int32, [N] +__global__ void kernel_morton3D( + const int * __restrict__ coords, + const uint32_t N, + int * indices +) { + // parallel + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + coords += n * 3; + indices[n] = __morton3D(coords[0], coords[1], coords[2]); +} + + +void morton3D(const at::Tensor coords, const uint32_t N, at::Tensor indices) { + static constexpr uint32_t N_THREAD = 128; + kernel_morton3D<<>>(coords.data_ptr(), N, indices.data_ptr()); +} + + +// indices: int32, [N] +// coords: int32, [N, 3] +__global__ void kernel_morton3D_invert( + const int * __restrict__ indices, + const uint32_t N, + int * coords +) { + // parallel + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + coords += n * 3; + + const int ind = indices[n]; + + coords[0] = __morton3D_invert(ind >> 0); + coords[1] = __morton3D_invert(ind >> 1); + coords[2] = __morton3D_invert(ind >> 2); +} + + +void morton3D_invert(const at::Tensor indices, const uint32_t N, at::Tensor coords) { + static constexpr uint32_t N_THREAD = 128; + kernel_morton3D_invert<<>>(indices.data_ptr(), N, coords.data_ptr()); +} + + +// grid: float, [C, H, H, H] +// N: int, C * H * H * H / 8 +// density_thresh: float +// bitfield: uint8, [N] +template +__global__ void kernel_packbits( + const scalar_t * __restrict__ grid, + const uint32_t N, + const float density_thresh, + uint8_t * bitfield +) { + // parallel per byte + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + grid += n * 8; + + uint8_t bits = 0; + + #pragma unroll + for (uint8_t i = 0; i < 8; i++) { + bits |= (grid[i] > density_thresh) ? ((uint8_t)1 << i) : 0; + } + + bitfield[n] = bits; +} + + +void packbits(const at::Tensor grid, const uint32_t N, const float density_thresh, at::Tensor bitfield) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grid.scalar_type(), "packbits", ([&] { + kernel_packbits<<>>(grid.data_ptr(), N, density_thresh, bitfield.data_ptr()); + })); +} + + +// grid: float, [C, H, H, H] +__global__ void kernel_morton3D_dilation( + const float * __restrict__ grid, + const uint32_t C, + const uint32_t H, + float * __restrict__ grid_dilation +) { + // parallel per byte + const uint32_t H3 = H * H * H; + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= C * H3) return; + + // locate + const uint32_t c = n / H3; + const uint32_t ind = n - c * H3; + + const uint32_t x = __morton3D_invert(ind >> 0); + const uint32_t y = __morton3D_invert(ind >> 1); + const uint32_t z = __morton3D_invert(ind >> 2); + + // manual max pool + float res = grid[n]; + + if (x + 1 < H) res = fmaxf(res, grid[c * H3 + __morton3D(x + 1, y, z)]); + if (x > 0) res = fmaxf(res, grid[c * H3 + __morton3D(x - 1, y, z)]); + if (y + 1 < H) res = fmaxf(res, grid[c * H3 + __morton3D(x, y + 1, z)]); + if (y > 0) res = fmaxf(res, grid[c * H3 + __morton3D(x, y - 1, z)]); + if (z + 1 < H) res = fmaxf(res, grid[c * H3 + __morton3D(x, y, z + 1)]); + if (z > 0) res = fmaxf(res, grid[c * H3 + __morton3D(x, y, z - 1)]); + + // write + grid_dilation[n] = res; +} + +void morton3D_dilation(const at::Tensor grid, const uint32_t C, const uint32_t H, at::Tensor grid_dilation) { + static constexpr uint32_t N_THREAD = 128; + + kernel_morton3D_dilation<<>>(grid.data_ptr(), C, H, grid_dilation.data_ptr()); +} + +//////////////////////////////////////////////////// +///////////// training ///////////// +//////////////////////////////////////////////////// + +// rays_o/d: [N, 3] +// grid: [CHHH / 8] +// xyzs, dirs, deltas: [M, 3], [M, 3], [M, 2] +// dirs: [M, 3] +// rays: [N, 3], idx, offset, num_steps +template +__global__ void kernel_march_rays_train( + const scalar_t * __restrict__ rays_o, + const scalar_t * __restrict__ rays_d, + const uint8_t * __restrict__ grid, + const float bound, + const float dt_gamma, const uint32_t max_steps, + const uint32_t N, const uint32_t C, const uint32_t H, const uint32_t M, + const scalar_t* __restrict__ nears, + const scalar_t* __restrict__ fars, + scalar_t * xyzs, scalar_t * dirs, scalar_t * deltas, + int * rays, + int * counter, + const scalar_t* __restrict__ noises +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + rays_o += n * 3; + rays_d += n * 3; + + // ray marching + const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2]; + const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2]; + const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz; + const float rH = 1 / (float)H; + const float H3 = H * H * H; + + const float near = nears[n]; + const float far = fars[n]; + const float noise = noises[n]; + + const float dt_max = 2 * SQRT3() * (1 << (C - 1)) / H; + const float dt_min = fminf(dt_max, 2 * SQRT3() / max_steps); + + float t0 = near; + + // perturb + t0 += clamp(t0 * dt_gamma, dt_min, dt_max) * noise; + + // first pass: estimation of num_steps + float t = t0; + uint32_t num_steps = 0; + + //if (t < far) printf("valid ray %d t=%f near=%f far=%f \n", n, t, near, far); + + while (t < far && num_steps < max_steps) { + // current point + const float x = clamp(ox + t * dx, -bound, bound); + const float y = clamp(oy + t * dy, -bound, bound); + const float z = clamp(oz + t * dz, -bound, bound); + + const float dt = clamp(t * dt_gamma, dt_min, dt_max); + + // get mip level + const int level = max(mip_from_pos(x, y, z, C), mip_from_dt(dt, H, C)); // range in [0, C - 1] + + const float mip_bound = fminf(scalbnf(1.0f, level), bound); + const float mip_rbound = 1 / mip_bound; + + // convert to nearest grid position + const int nx = clamp(0.5 * (x * mip_rbound + 1) * H, 0.0f, (float)(H - 1)); + const int ny = clamp(0.5 * (y * mip_rbound + 1) * H, 0.0f, (float)(H - 1)); + const int nz = clamp(0.5 * (z * mip_rbound + 1) * H, 0.0f, (float)(H - 1)); + + const uint32_t index = level * H3 + __morton3D(nx, ny, nz); + const bool occ = grid[index / 8] & (1 << (index % 8)); + + // if occpuied, advance a small step, and write to output + //if (n == 0) printf("t=%f density=%f vs thresh=%f step=%d\n", t, density, density_thresh, num_steps); + + if (occ) { + num_steps++; + t += dt; + // else, skip a large step (basically skip a voxel grid) + } else { + // calc distance to next voxel + const float tx = (((nx + 0.5f + 0.5f * signf(dx)) * rH * 2 - 1) * mip_bound - x) * rdx; + const float ty = (((ny + 0.5f + 0.5f * signf(dy)) * rH * 2 - 1) * mip_bound - y) * rdy; + const float tz = (((nz + 0.5f + 0.5f * signf(dz)) * rH * 2 - 1) * mip_bound - z) * rdz; + + const float tt = t + fmaxf(0.0f, fminf(tx, fminf(ty, tz))); + // step until next voxel + do { + t += clamp(t * dt_gamma, dt_min, dt_max); + } while (t < tt); + } + } + + //printf("[n=%d] num_steps=%d, near=%f, far=%f, dt=%f, max_steps=%f\n", n, num_steps, near, far, dt_min, (far - near) / dt_min); + + // second pass: really locate and write points & dirs + uint32_t point_index = atomicAdd(counter, num_steps); + uint32_t ray_index = atomicAdd(counter + 1, 1); + + //printf("[n=%d] num_steps=%d, point_index=%d, ray_index=%d\n", n, num_steps, point_index, ray_index); + + // write rays + rays[ray_index * 3] = n; + rays[ray_index * 3 + 1] = point_index; + rays[ray_index * 3 + 2] = num_steps; + + if (num_steps == 0) return; + if (point_index + num_steps > M) return; + + xyzs += point_index * 3; + dirs += point_index * 3; + deltas += point_index * 2; + + t = t0; + uint32_t step = 0; + + while (t < far && step < num_steps) { + // current point + const float x = clamp(ox + t * dx, -bound, bound); + const float y = clamp(oy + t * dy, -bound, bound); + const float z = clamp(oz + t * dz, -bound, bound); + + const float dt = clamp(t * dt_gamma, dt_min, dt_max); + + // get mip level + const int level = max(mip_from_pos(x, y, z, C), mip_from_dt(dt, H, C)); // range in [0, C - 1] + + const float mip_bound = fminf(scalbnf(1.0f, level), bound); + const float mip_rbound = 1 / mip_bound; + + // convert to nearest grid position + const int nx = clamp(0.5 * (x * mip_rbound + 1) * H, 0.0f, (float)(H - 1)); + const int ny = clamp(0.5 * (y * mip_rbound + 1) * H, 0.0f, (float)(H - 1)); + const int nz = clamp(0.5 * (z * mip_rbound + 1) * H, 0.0f, (float)(H - 1)); + + // query grid + const uint32_t index = level * H3 + __morton3D(nx, ny, nz); + const bool occ = grid[index / 8] & (1 << (index % 8)); + + // if occpuied, advance a small step, and write to output + if (occ) { + // write step + xyzs[0] = x; + xyzs[1] = y; + xyzs[2] = z; + dirs[0] = dx; + dirs[1] = dy; + dirs[2] = dz; + t += dt; + deltas[0] = dt; + deltas[1] = t; // used to calc depth + xyzs += 3; + dirs += 3; + deltas += 2; + step++; + // else, skip a large step (basically skip a voxel grid) + } else { + // calc distance to next voxel + const float tx = (((nx + 0.5f + 0.5f * signf(dx)) * rH * 2 - 1) * mip_bound - x) * rdx; + const float ty = (((ny + 0.5f + 0.5f * signf(dy)) * rH * 2 - 1) * mip_bound - y) * rdy; + const float tz = (((nz + 0.5f + 0.5f * signf(dz)) * rH * 2 - 1) * mip_bound - z) * rdz; + const float tt = t + fmaxf(0.0f, fminf(tx, fminf(ty, tz))); + // step until next voxel + do { + t += clamp(t * dt_gamma, dt_min, dt_max); + } while (t < tt); + } + } +} + +void march_rays_train(const at::Tensor rays_o, const at::Tensor rays_d, const at::Tensor grid, const float bound, const float dt_gamma, const uint32_t max_steps, const uint32_t N, const uint32_t C, const uint32_t H, const uint32_t M, const at::Tensor nears, const at::Tensor fars, at::Tensor xyzs, at::Tensor dirs, at::Tensor deltas, at::Tensor rays, at::Tensor counter, at::Tensor noises) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + rays_o.scalar_type(), "march_rays_train", ([&] { + kernel_march_rays_train<<>>(rays_o.data_ptr(), rays_d.data_ptr(), grid.data_ptr(), bound, dt_gamma, max_steps, N, C, H, M, nears.data_ptr(), fars.data_ptr(), xyzs.data_ptr(), dirs.data_ptr(), deltas.data_ptr(), rays.data_ptr(), counter.data_ptr(), noises.data_ptr()); + })); +} + + +// grad_xyzs/dirs: [M, 3] +// rays: [N, 3] +// deltas: [M, 2] +// grad_rays_o/d: [N, 3] +template +__global__ void kernel_march_rays_train_backward( + const scalar_t * __restrict__ grad_xyzs, + const scalar_t * __restrict__ grad_dirs, + const int * __restrict__ rays, + const scalar_t * __restrict__ deltas, + const uint32_t N, const uint32_t M, + scalar_t * grad_rays_o, + scalar_t * grad_rays_d +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + grad_rays_o += n * 3; + grad_rays_d += n * 3; + + uint32_t index = rays[n * 3]; + uint32_t offset = rays[n * 3 + 1]; + uint32_t num_steps = rays[n * 3 + 2]; + + // empty ray, or ray that exceed max step count. + if (num_steps == 0 || offset + num_steps > M) return; + + grad_xyzs += offset * 3; + grad_dirs += offset * 3; + deltas += offset * 2; + + // accumulate + uint32_t step = 0; + while (step < num_steps) { + + grad_rays_o[0] += grad_xyzs[0]; + grad_rays_o[1] += grad_xyzs[1]; + grad_rays_o[2] += grad_xyzs[2]; + + grad_rays_d[0] += grad_xyzs[0] * deltas[1] + grad_dirs[0]; + grad_rays_d[1] += grad_xyzs[1] * deltas[1] + grad_dirs[1]; + grad_rays_d[2] += grad_xyzs[2] * deltas[1] + grad_dirs[2]; + + // locate + grad_xyzs += 3; + grad_dirs += 3; + deltas += 2; + + step++; + } +} + +void march_rays_train_backward(const at::Tensor grad_xyzs, const at::Tensor grad_dirs, const at::Tensor rays, const at::Tensor deltas, const uint32_t N, const uint32_t M, at::Tensor grad_rays_o, at::Tensor grad_rays_d) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_xyzs.scalar_type(), "march_rays_train_backward", ([&] { + kernel_march_rays_train_backward<<>>(grad_xyzs.data_ptr(), grad_dirs.data_ptr(), rays.data_ptr(), deltas.data_ptr(), N, M, grad_rays_o.data_ptr(), grad_rays_d.data_ptr()); + })); +} + + +// sigmas: [M] +// rgbs: [M, 3] +// deltas: [M, 2] +// rays: [N, 3], idx, offset, num_steps +// weights_sum: [N], final pixel alpha +// depth: [N,] +// image: [N, 3] +template +__global__ void kernel_composite_rays_train_forward( + const scalar_t * __restrict__ sigmas, + const scalar_t * __restrict__ rgbs, + const scalar_t * __restrict__ ambient, + const scalar_t * __restrict__ deltas, + const int * __restrict__ rays, + const uint32_t M, const uint32_t N, const float T_thresh, + scalar_t * weights_sum, + scalar_t * ambient_sum, + scalar_t * depth, + scalar_t * image +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + uint32_t index = rays[n * 3]; + uint32_t offset = rays[n * 3 + 1]; + uint32_t num_steps = rays[n * 3 + 2]; + + // empty ray, or ray that exceed max step count. + if (num_steps == 0 || offset + num_steps > M) { + weights_sum[index] = 0; + ambient_sum[index] = 0; + depth[index] = 0; + image[index * 3] = 0; + image[index * 3 + 1] = 0; + image[index * 3 + 2] = 0; + return; + } + + sigmas += offset; + rgbs += offset * 3; + ambient += offset; + deltas += offset * 2; + + // accumulate + uint32_t step = 0; + + scalar_t T = 1.0f; + scalar_t r = 0, g = 0, b = 0, ws = 0, d = 0, amb = 0; + + while (step < num_steps) { + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + const scalar_t weight = alpha * T; + + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + + d += weight * deltas[1]; + + ws += weight; + + amb += ambient[0]; + + T *= 1.0f - alpha; + + // minimal remained transmittence + if (T < T_thresh) break; + + //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d); + + // locate + sigmas++; + rgbs += 3; + ambient++; + deltas += 2; + + step++; + } + + //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d); + + // write + weights_sum[index] = ws; // weights_sum + ambient_sum[index] = amb; + depth[index] = d; + image[index * 3] = r; + image[index * 3 + 1] = g; + image[index * 3 + 2] = b; +} + + +void composite_rays_train_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor ambient_sum, at::Tensor depth, at::Tensor image) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + sigmas.scalar_type(), "composite_rays_train_forward", ([&] { + kernel_composite_rays_train_forward<<>>(sigmas.data_ptr(), rgbs.data_ptr(), ambient.data_ptr(), deltas.data_ptr(), rays.data_ptr(), M, N, T_thresh, weights_sum.data_ptr(), ambient_sum.data_ptr(), depth.data_ptr(), image.data_ptr()); + })); +} + + +// grad_weights_sum: [N,] +// grad: [N, 3] +// sigmas: [M] +// rgbs: [M, 3] +// deltas: [M, 2] +// rays: [N, 3], idx, offset, num_steps +// weights_sum: [N,], weights_sum here +// image: [N, 3] +// grad_sigmas: [M] +// grad_rgbs: [M, 3] +template +__global__ void kernel_composite_rays_train_backward( + const scalar_t * __restrict__ grad_weights_sum, + const scalar_t * __restrict__ grad_ambient_sum, + const scalar_t * __restrict__ grad_image, + const scalar_t * __restrict__ sigmas, + const scalar_t * __restrict__ rgbs, + const scalar_t * __restrict__ ambient, + const scalar_t * __restrict__ deltas, + const int * __restrict__ rays, + const scalar_t * __restrict__ weights_sum, + const scalar_t * __restrict__ ambient_sum, + const scalar_t * __restrict__ image, + const uint32_t M, const uint32_t N, const float T_thresh, + scalar_t * grad_sigmas, + scalar_t * grad_rgbs, + scalar_t * grad_ambient +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + uint32_t index = rays[n * 3]; + uint32_t offset = rays[n * 3 + 1]; + uint32_t num_steps = rays[n * 3 + 2]; + + if (num_steps == 0 || offset + num_steps > M) return; + + grad_weights_sum += index; + grad_ambient_sum += index; + grad_image += index * 3; + weights_sum += index; + ambient_sum += index; + image += index * 3; + + sigmas += offset; + rgbs += offset * 3; + ambient += offset; + deltas += offset * 2; + + grad_sigmas += offset; + grad_rgbs += offset * 3; + grad_ambient += offset; + + // accumulate + uint32_t step = 0; + + scalar_t T = 1.0f; + const scalar_t r_final = image[0], g_final = image[1], b_final = image[2], ws_final = weights_sum[0]; + scalar_t r = 0, g = 0, b = 0, ws = 0; + + while (step < num_steps) { + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + const scalar_t weight = alpha * T; + + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + // amb += weight * ambient[0]; + ws += weight; + + T *= 1.0f - alpha; + + // check https://note.kiui.moe/others/nerf_gradient/ for the gradient calculation. + // write grad_rgbs + grad_rgbs[0] = grad_image[0] * weight; + grad_rgbs[1] = grad_image[1] * weight; + grad_rgbs[2] = grad_image[2] * weight; + + // write grad_ambient + grad_ambient[0] = grad_ambient_sum[0]; + + // write grad_sigmas + grad_sigmas[0] = deltas[0] * ( + grad_image[0] * (T * rgbs[0] - (r_final - r)) + + grad_image[1] * (T * rgbs[1] - (g_final - g)) + + grad_image[2] * (T * rgbs[2] - (b_final - b)) + + // grad_ambient_sum[0] * (T * ambient[0] - (amb_final - amb)) + + grad_weights_sum[0] * (1 - ws_final) + ); + + //printf("[n=%d] num_steps=%d, T=%f, grad_sigmas=%f, r_final=%f, r=%f\n", n, step, T, grad_sigmas[0], r_final, r); + // minimal remained transmittence + if (T < T_thresh) break; + + // locate + sigmas++; + rgbs += 3; + // ambient++; + deltas += 2; + grad_sigmas++; + grad_rgbs += 3; + grad_ambient++; + + step++; + } +} + + +void composite_rays_train_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_ambient_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor ambient_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs, at::Tensor grad_ambient) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_image.scalar_type(), "composite_rays_train_backward", ([&] { + kernel_composite_rays_train_backward<<>>(grad_weights_sum.data_ptr(), grad_ambient_sum.data_ptr(), grad_image.data_ptr(), sigmas.data_ptr(), rgbs.data_ptr(), ambient.data_ptr(), deltas.data_ptr(), rays.data_ptr(), weights_sum.data_ptr(), ambient_sum.data_ptr(), image.data_ptr(), M, N, T_thresh, grad_sigmas.data_ptr(), grad_rgbs.data_ptr(), grad_ambient.data_ptr()); + })); +} + + +//////////////////////////////////////////////////// +///////////// infernce ///////////// +//////////////////////////////////////////////////// + +template +__global__ void kernel_march_rays( + const uint32_t n_alive, + const uint32_t n_step, + const int* __restrict__ rays_alive, + const scalar_t* __restrict__ rays_t, + const scalar_t* __restrict__ rays_o, + const scalar_t* __restrict__ rays_d, + const float bound, + const float dt_gamma, const uint32_t max_steps, + const uint32_t C, const uint32_t H, + const uint8_t * __restrict__ grid, + const scalar_t* __restrict__ nears, + const scalar_t* __restrict__ fars, + scalar_t* xyzs, scalar_t* dirs, scalar_t* deltas, + const scalar_t* __restrict__ noises +) { + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= n_alive) return; + + const int index = rays_alive[n]; // ray id + const float noise = noises[n]; + + // locate + rays_o += index * 3; + rays_d += index * 3; + xyzs += n * n_step * 3; + dirs += n * n_step * 3; + deltas += n * n_step * 2; + + const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2]; + const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2]; + const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz; + const float rH = 1 / (float)H; + const float H3 = H * H * H; + + float t = rays_t[index]; // current ray's t + const float near = nears[index], far = fars[index]; + + const float dt_max = 2 * SQRT3() * (1 << (C - 1)) / H; + const float dt_min = fminf(dt_max, 2 * SQRT3() / max_steps); + + // march for n_step steps, record points + uint32_t step = 0; + + // introduce some randomness + t += clamp(t * dt_gamma, dt_min, dt_max) * noise; + + while (t < far && step < n_step) { + // current point + const float x = clamp(ox + t * dx, -bound, bound); + const float y = clamp(oy + t * dy, -bound, bound); + const float z = clamp(oz + t * dz, -bound, bound); + + const float dt = clamp(t * dt_gamma, dt_min, dt_max); + + // get mip level + const int level = max(mip_from_pos(x, y, z, C), mip_from_dt(dt, H, C)); // range in [0, C - 1] + + const float mip_bound = fminf(scalbnf(1, level), bound); + const float mip_rbound = 1 / mip_bound; + + // convert to nearest grid position + const int nx = clamp(0.5 * (x * mip_rbound + 1) * H, 0.0f, (float)(H - 1)); + const int ny = clamp(0.5 * (y * mip_rbound + 1) * H, 0.0f, (float)(H - 1)); + const int nz = clamp(0.5 * (z * mip_rbound + 1) * H, 0.0f, (float)(H - 1)); + + const uint32_t index = level * H3 + __morton3D(nx, ny, nz); + const bool occ = grid[index / 8] & (1 << (index % 8)); + + // if occpuied, advance a small step, and write to output + if (occ) { + // write step + xyzs[0] = x; + xyzs[1] = y; + xyzs[2] = z; + dirs[0] = dx; + dirs[1] = dy; + dirs[2] = dz; + // calc dt + t += dt; + deltas[0] = dt; + deltas[1] = t; // used to calc depth + // step + xyzs += 3; + dirs += 3; + deltas += 2; + step++; + + // else, skip a large step (basically skip a voxel grid) + } else { + // calc distance to next voxel + const float tx = (((nx + 0.5f + 0.5f * signf(dx)) * rH * 2 - 1) * mip_bound - x) * rdx; + const float ty = (((ny + 0.5f + 0.5f * signf(dy)) * rH * 2 - 1) * mip_bound - y) * rdy; + const float tz = (((nz + 0.5f + 0.5f * signf(dz)) * rH * 2 - 1) * mip_bound - z) * rdz; + const float tt = t + fmaxf(0.0f, fminf(tx, fminf(ty, tz))); + // step until next voxel + do { + t += clamp(t * dt_gamma, dt_min, dt_max); + } while (t < tt); + } + } +} + + +void march_rays(const uint32_t n_alive, const uint32_t n_step, const at::Tensor rays_alive, const at::Tensor rays_t, const at::Tensor rays_o, const at::Tensor rays_d, const float bound, const float dt_gamma, const uint32_t max_steps, const uint32_t C, const uint32_t H, const at::Tensor grid, const at::Tensor near, const at::Tensor far, at::Tensor xyzs, at::Tensor dirs, at::Tensor deltas, at::Tensor noises) { + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + rays_o.scalar_type(), "march_rays", ([&] { + kernel_march_rays<<>>(n_alive, n_step, rays_alive.data_ptr(), rays_t.data_ptr(), rays_o.data_ptr(), rays_d.data_ptr(), bound, dt_gamma, max_steps, C, H, grid.data_ptr(), near.data_ptr(), far.data_ptr(), xyzs.data_ptr(), dirs.data_ptr(), deltas.data_ptr(), noises.data_ptr()); + })); +} + + +template +__global__ void kernel_composite_rays( + const uint32_t n_alive, + const uint32_t n_step, + const float T_thresh, + int* rays_alive, + scalar_t* rays_t, + const scalar_t* __restrict__ sigmas, + const scalar_t* __restrict__ rgbs, + const scalar_t* __restrict__ deltas, + scalar_t* weights_sum, scalar_t* depth, scalar_t* image +) { + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= n_alive) return; + + const int index = rays_alive[n]; // ray id + + // locate + sigmas += n * n_step; + rgbs += n * n_step * 3; + deltas += n * n_step * 2; + + rays_t += index; + weights_sum += index; + depth += index; + image += index * 3; + + scalar_t t = rays_t[0]; // current ray's t + + scalar_t weight_sum = weights_sum[0]; + scalar_t d = depth[0]; + scalar_t r = image[0]; + scalar_t g = image[1]; + scalar_t b = image[2]; + + // accumulate + uint32_t step = 0; + while (step < n_step) { + + // ray is terminated if delta == 0 + if (deltas[0] == 0) break; + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + + /* + T_0 = 1; T_i = \prod_{j=0}^{i-1} (1 - alpha_j) + w_i = alpha_i * T_i + --> + T_i = 1 - \sum_{j=0}^{i-1} w_j + */ + const scalar_t T = 1 - weight_sum; + const scalar_t weight = alpha * T; + weight_sum += weight; + + t = deltas[1]; + d += weight * t; + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + + //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d); + + // ray is terminated if T is too small + // use a larger bound to further accelerate inference + if (T < T_thresh) break; + + // locate + sigmas++; + rgbs += 3; + deltas += 2; + step++; + } + + //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d); + + // rays_alive = -1 means ray is terminated early. + if (step < n_step) { + rays_alive[n] = -1; + } else { + rays_t[0] = t; + } + + weights_sum[0] = weight_sum; // this is the thing I needed! + depth[0] = d; + image[0] = r; + image[1] = g; + image[2] = b; +} + + +void composite_rays(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor weights, at::Tensor depth, at::Tensor image) { + static constexpr uint32_t N_THREAD = 128; + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + image.scalar_type(), "composite_rays", ([&] { + kernel_composite_rays<<>>(n_alive, n_step, T_thresh, rays_alive.data_ptr(), rays_t.data_ptr(), sigmas.data_ptr(), rgbs.data_ptr(), deltas.data_ptr(), weights.data_ptr(), depth.data_ptr(), image.data_ptr()); + })); +} + + + +template +__global__ void kernel_composite_rays_ambient( + const uint32_t n_alive, + const uint32_t n_step, + const float T_thresh, + int* rays_alive, + scalar_t* rays_t, + const scalar_t* __restrict__ sigmas, + const scalar_t* __restrict__ rgbs, + const scalar_t* __restrict__ deltas, + const scalar_t* __restrict__ ambients, + scalar_t* weights_sum, scalar_t* depth, scalar_t* image, scalar_t* ambient_sum +) { + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= n_alive) return; + + const int index = rays_alive[n]; // ray id + + // locate + sigmas += n * n_step; + rgbs += n * n_step * 3; + deltas += n * n_step * 2; + ambients += n * n_step; + + rays_t += index; + weights_sum += index; + depth += index; + image += index * 3; + ambient_sum += index; + + scalar_t t = rays_t[0]; // current ray's t + + scalar_t weight_sum = weights_sum[0]; + scalar_t d = depth[0]; + scalar_t r = image[0]; + scalar_t g = image[1]; + scalar_t b = image[2]; + scalar_t a = ambient_sum[0]; + + // accumulate + uint32_t step = 0; + while (step < n_step) { + + // ray is terminated if delta == 0 + if (deltas[0] == 0) break; + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + + /* + T_0 = 1; T_i = \prod_{j=0}^{i-1} (1 - alpha_j) + w_i = alpha_i * T_i + --> + T_i = 1 - \sum_{j=0}^{i-1} w_j + */ + const scalar_t T = 1 - weight_sum; + const scalar_t weight = alpha * T; + weight_sum += weight; + + t = deltas[1]; + d += weight * t; + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + a += ambients[0]; + + //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d); + + // ray is terminated if T is too small + // use a larger bound to further accelerate inference + if (T < T_thresh) break; + + // locate + sigmas++; + rgbs += 3; + deltas += 2; + step++; + ambients++; + } + + //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d); + + // rays_alive = -1 means ray is terminated early. + if (step < n_step) { + rays_alive[n] = -1; + } else { + rays_t[0] = t; + } + + weights_sum[0] = weight_sum; // this is the thing I needed! + depth[0] = d; + image[0] = r; + image[1] = g; + image[2] = b; + ambient_sum[0] = a; +} + + +void composite_rays_ambient(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor ambients, at::Tensor weights, at::Tensor depth, at::Tensor image, at::Tensor ambient_sum) { + static constexpr uint32_t N_THREAD = 128; + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + image.scalar_type(), "composite_rays_ambient", ([&] { + kernel_composite_rays_ambient<<>>(n_alive, n_step, T_thresh, rays_alive.data_ptr(), rays_t.data_ptr(), sigmas.data_ptr(), rgbs.data_ptr(), deltas.data_ptr(), ambients.data_ptr(), weights.data_ptr(), depth.data_ptr(), image.data_ptr(), ambient_sum.data_ptr()); + })); +} + + + + + + +// -------------------------------- sigma ambient ----------------------------- + +// sigmas: [M] +// rgbs: [M, 3] +// deltas: [M, 2] +// rays: [N, 3], idx, offset, num_steps +// weights_sum: [N], final pixel alpha +// depth: [N,] +// image: [N, 3] +template +__global__ void kernel_composite_rays_train_sigma_forward( + const scalar_t * __restrict__ sigmas, + const scalar_t * __restrict__ rgbs, + const scalar_t * __restrict__ ambient, + const scalar_t * __restrict__ deltas, + const int * __restrict__ rays, + const uint32_t M, const uint32_t N, const float T_thresh, + scalar_t * weights_sum, + scalar_t * ambient_sum, + scalar_t * depth, + scalar_t * image +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + uint32_t index = rays[n * 3]; + uint32_t offset = rays[n * 3 + 1]; + uint32_t num_steps = rays[n * 3 + 2]; + + // empty ray, or ray that exceed max step count. + if (num_steps == 0 || offset + num_steps > M) { + weights_sum[index] = 0; + ambient_sum[index] = 0; + depth[index] = 0; + image[index * 3] = 0; + image[index * 3 + 1] = 0; + image[index * 3 + 2] = 0; + return; + } + + sigmas += offset; + rgbs += offset * 3; + ambient += offset; + deltas += offset * 2; + + // accumulate + uint32_t step = 0; + + scalar_t T = 1.0f; + scalar_t r = 0, g = 0, b = 0, ws = 0, d = 0, amb = 0; + + while (step < num_steps) { + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + const scalar_t weight = alpha * T; + + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + + d += weight * deltas[1]; + + ws += weight; + + amb += weight * ambient[0]; + + T *= 1.0f - alpha; + + // minimal remained transmittence + if (T < T_thresh) break; + + //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d); + + // locate + sigmas++; + rgbs += 3; + ambient++; + deltas += 2; + + step++; + } + + //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d); + + // write + weights_sum[index] = ws; // weights_sum + ambient_sum[index] = amb; + depth[index] = d; + image[index * 3] = r; + image[index * 3 + 1] = g; + image[index * 3 + 2] = b; +} + + +void composite_rays_train_sigma_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor ambient_sum, at::Tensor depth, at::Tensor image) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + sigmas.scalar_type(), "composite_rays_train_sigma_forward", ([&] { + kernel_composite_rays_train_sigma_forward<<>>(sigmas.data_ptr(), rgbs.data_ptr(), ambient.data_ptr(), deltas.data_ptr(), rays.data_ptr(), M, N, T_thresh, weights_sum.data_ptr(), ambient_sum.data_ptr(), depth.data_ptr(), image.data_ptr()); + })); +} + + +// grad_weights_sum: [N,] +// grad: [N, 3] +// sigmas: [M] +// rgbs: [M, 3] +// deltas: [M, 2] +// rays: [N, 3], idx, offset, num_steps +// weights_sum: [N,], weights_sum here +// image: [N, 3] +// grad_sigmas: [M] +// grad_rgbs: [M, 3] +template +__global__ void kernel_composite_rays_train_sigma_backward( + const scalar_t * __restrict__ grad_weights_sum, + const scalar_t * __restrict__ grad_ambient_sum, + const scalar_t * __restrict__ grad_image, + const scalar_t * __restrict__ sigmas, + const scalar_t * __restrict__ rgbs, + const scalar_t * __restrict__ ambient, + const scalar_t * __restrict__ deltas, + const int * __restrict__ rays, + const scalar_t * __restrict__ weights_sum, + const scalar_t * __restrict__ ambient_sum, + const scalar_t * __restrict__ image, + const uint32_t M, const uint32_t N, const float T_thresh, + scalar_t * grad_sigmas, + scalar_t * grad_rgbs, + scalar_t * grad_ambient +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + uint32_t index = rays[n * 3]; + uint32_t offset = rays[n * 3 + 1]; + uint32_t num_steps = rays[n * 3 + 2]; + + if (num_steps == 0 || offset + num_steps > M) return; + + grad_weights_sum += index; + grad_ambient_sum += index; + grad_image += index * 3; + weights_sum += index; + ambient_sum += index; + image += index * 3; + + sigmas += offset; + rgbs += offset * 3; + ambient += offset; + deltas += offset * 2; + + grad_sigmas += offset; + grad_rgbs += offset * 3; + grad_ambient += offset; + + // accumulate + uint32_t step = 0; + + scalar_t T = 1.0f; + const scalar_t r_final = image[0], g_final = image[1], b_final = image[2], ws_final = weights_sum[0], amb_final = ambient_sum[0]; + scalar_t r = 0, g = 0, b = 0, ws = 0, amb = 0; + + while (step < num_steps) { + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + const scalar_t weight = alpha * T; + + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + amb += weight * ambient[0]; + ws += weight; + + T *= 1.0f - alpha; + + // check https://note.kiui.moe/others/nerf_gradient/ for the gradient calculation. + // write grad_rgbs + grad_rgbs[0] = grad_image[0] * weight; + grad_rgbs[1] = grad_image[1] * weight; + grad_rgbs[2] = grad_image[2] * weight; + + // write grad_ambient + grad_ambient[0] = grad_ambient_sum[0] * weight; + + // write grad_sigmas + grad_sigmas[0] = deltas[0] * ( + grad_image[0] * (T * rgbs[0] - (r_final - r)) + + grad_image[1] * (T * rgbs[1] - (g_final - g)) + + grad_image[2] * (T * rgbs[2] - (b_final - b)) + + grad_ambient_sum[0] * (T * ambient[0] - (amb_final - amb)) + + grad_weights_sum[0] * (1 - ws_final) + ); + + //printf("[n=%d] num_steps=%d, T=%f, grad_sigmas=%f, r_final=%f, r=%f\n", n, step, T, grad_sigmas[0], r_final, r); + // minimal remained transmittence + if (T < T_thresh) break; + + // locate + sigmas++; + rgbs += 3; + ambient++; + deltas += 2; + grad_sigmas++; + grad_rgbs += 3; + grad_ambient++; + + step++; + } +} + + +void composite_rays_train_sigma_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_ambient_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor ambient_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs, at::Tensor grad_ambient) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_image.scalar_type(), "composite_rays_train_sigma_backward", ([&] { + kernel_composite_rays_train_sigma_backward<<>>(grad_weights_sum.data_ptr(), grad_ambient_sum.data_ptr(), grad_image.data_ptr(), sigmas.data_ptr(), rgbs.data_ptr(), ambient.data_ptr(), deltas.data_ptr(), rays.data_ptr(), weights_sum.data_ptr(), ambient_sum.data_ptr(), image.data_ptr(), M, N, T_thresh, grad_sigmas.data_ptr(), grad_rgbs.data_ptr(), grad_ambient.data_ptr()); + })); +} + + +//////////////////////////////////////////////////// +///////////// infernce ///////////// +//////////////////////////////////////////////////// + + +template +__global__ void kernel_composite_rays_ambient_sigma( + const uint32_t n_alive, + const uint32_t n_step, + const float T_thresh, + int* rays_alive, + scalar_t* rays_t, + const scalar_t* __restrict__ sigmas, + const scalar_t* __restrict__ rgbs, + const scalar_t* __restrict__ deltas, + const scalar_t* __restrict__ ambients, + scalar_t* weights_sum, scalar_t* depth, scalar_t* image, scalar_t* ambient_sum +) { + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= n_alive) return; + + const int index = rays_alive[n]; // ray id + + // locate + sigmas += n * n_step; + rgbs += n * n_step * 3; + deltas += n * n_step * 2; + ambients += n * n_step; + + rays_t += index; + weights_sum += index; + depth += index; + image += index * 3; + ambient_sum += index; + + scalar_t t = rays_t[0]; // current ray's t + + scalar_t weight_sum = weights_sum[0]; + scalar_t d = depth[0]; + scalar_t r = image[0]; + scalar_t g = image[1]; + scalar_t b = image[2]; + scalar_t a = ambient_sum[0]; + + // accumulate + uint32_t step = 0; + while (step < n_step) { + + // ray is terminated if delta == 0 + if (deltas[0] == 0) break; + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + + /* + T_0 = 1; T_i = \prod_{j=0}^{i-1} (1 - alpha_j) + w_i = alpha_i * T_i + --> + T_i = 1 - \sum_{j=0}^{i-1} w_j + */ + const scalar_t T = 1 - weight_sum; + const scalar_t weight = alpha * T; + weight_sum += weight; + + t = deltas[1]; + d += weight * t; + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + a += weight * ambients[0]; + + //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d); + + // ray is terminated if T is too small + // use a larger bound to further accelerate inference + if (T < T_thresh) break; + + // locate + sigmas++; + rgbs += 3; + deltas += 2; + step++; + ambients++; + } + + //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d); + + // rays_alive = -1 means ray is terminated early. + if (step < n_step) { + rays_alive[n] = -1; + } else { + rays_t[0] = t; + } + + weights_sum[0] = weight_sum; // this is the thing I needed! + depth[0] = d; + image[0] = r; + image[1] = g; + image[2] = b; + ambient_sum[0] = a; +} + + +void composite_rays_ambient_sigma(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor ambients, at::Tensor weights, at::Tensor depth, at::Tensor image, at::Tensor ambient_sum) { + static constexpr uint32_t N_THREAD = 128; + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + image.scalar_type(), "composite_rays_ambient_sigma", ([&] { + kernel_composite_rays_ambient_sigma<<>>(n_alive, n_step, T_thresh, rays_alive.data_ptr(), rays_t.data_ptr(), sigmas.data_ptr(), rgbs.data_ptr(), deltas.data_ptr(), ambients.data_ptr(), weights.data_ptr(), depth.data_ptr(), image.data_ptr(), ambient_sum.data_ptr()); + })); +} + + + + + + + +// -------------------------------- uncertainty ----------------------------- + +// sigmas: [M] +// rgbs: [M, 3] +// deltas: [M, 2] +// rays: [N, 3], idx, offset, num_steps +// weights_sum: [N], final pixel alpha +// depth: [N,] +// image: [N, 3] +template +__global__ void kernel_composite_rays_train_uncertainty_forward( + const scalar_t * __restrict__ sigmas, + const scalar_t * __restrict__ rgbs, + const scalar_t * __restrict__ ambient, + const scalar_t * __restrict__ uncertainty, + const scalar_t * __restrict__ deltas, + const int * __restrict__ rays, + const uint32_t M, const uint32_t N, const float T_thresh, + scalar_t * weights_sum, + scalar_t * ambient_sum, + scalar_t * uncertainty_sum, + scalar_t * depth, + scalar_t * image +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + uint32_t index = rays[n * 3]; + uint32_t offset = rays[n * 3 + 1]; + uint32_t num_steps = rays[n * 3 + 2]; + + // empty ray, or ray that exceed max step count. + if (num_steps == 0 || offset + num_steps > M) { + weights_sum[index] = 0; + ambient_sum[index] = 0; + uncertainty_sum[index] = 0; + depth[index] = 0; + image[index * 3] = 0; + image[index * 3 + 1] = 0; + image[index * 3 + 2] = 0; + return; + } + + sigmas += offset; + rgbs += offset * 3; + ambient += offset; + uncertainty += offset; + deltas += offset * 2; + + // accumulate + uint32_t step = 0; + + scalar_t T = 1.0f; + scalar_t r = 0, g = 0, b = 0, ws = 0, d = 0, amb = 0, unc = 0; + + while (step < num_steps) { + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + const scalar_t weight = alpha * T; + + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + + d += weight * deltas[1]; + + ws += weight; + + amb += ambient[0]; + unc += weight * uncertainty[0]; + + T *= 1.0f - alpha; + + // minimal remained transmittence + if (T < T_thresh) break; + + //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d); + + // locate + sigmas++; + rgbs += 3; + ambient++; + uncertainty++; + deltas += 2; + + step++; + } + + //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d); + + // write + weights_sum[index] = ws; // weights_sum + ambient_sum[index] = amb; + uncertainty_sum[index] = unc; + depth[index] = d; + image[index * 3] = r; + image[index * 3 + 1] = g; + image[index * 3 + 2] = b; +} + + +void composite_rays_train_uncertainty_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor uncertainty, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor ambient_sum, at::Tensor uncertainty_sum, at::Tensor depth, at::Tensor image) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + sigmas.scalar_type(), "composite_rays_train_uncertainty_forward", ([&] { + kernel_composite_rays_train_uncertainty_forward<<>>(sigmas.data_ptr(), rgbs.data_ptr(), ambient.data_ptr(), uncertainty.data_ptr(), deltas.data_ptr(), rays.data_ptr(), M, N, T_thresh, weights_sum.data_ptr(), ambient_sum.data_ptr(), uncertainty_sum.data_ptr(), depth.data_ptr(), image.data_ptr()); + })); +} + + +// grad_weights_sum: [N,] +// grad: [N, 3] +// sigmas: [M] +// rgbs: [M, 3] +// deltas: [M, 2] +// rays: [N, 3], idx, offset, num_steps +// weights_sum: [N,], weights_sum here +// image: [N, 3] +// grad_sigmas: [M] +// grad_rgbs: [M, 3] +template +__global__ void kernel_composite_rays_train_uncertainty_backward( + const scalar_t * __restrict__ grad_weights_sum, + const scalar_t * __restrict__ grad_ambient_sum, + const scalar_t * __restrict__ grad_uncertainty_sum, + const scalar_t * __restrict__ grad_image, + const scalar_t * __restrict__ sigmas, + const scalar_t * __restrict__ rgbs, + const scalar_t * __restrict__ ambient, + const scalar_t * __restrict__ uncertainty, + const scalar_t * __restrict__ deltas, + const int * __restrict__ rays, + const scalar_t * __restrict__ weights_sum, + const scalar_t * __restrict__ ambient_sum, + const scalar_t * __restrict__ uncertainty_sum, + const scalar_t * __restrict__ image, + const uint32_t M, const uint32_t N, const float T_thresh, + scalar_t * grad_sigmas, + scalar_t * grad_rgbs, + scalar_t * grad_ambient, + scalar_t * grad_uncertainty +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + uint32_t index = rays[n * 3]; + uint32_t offset = rays[n * 3 + 1]; + uint32_t num_steps = rays[n * 3 + 2]; + + if (num_steps == 0 || offset + num_steps > M) return; + + grad_weights_sum += index; + grad_ambient_sum += index; + grad_uncertainty_sum += index; + grad_image += index * 3; + weights_sum += index; + ambient_sum += index; + uncertainty_sum += index; + image += index * 3; + + sigmas += offset; + rgbs += offset * 3; + ambient += offset; + uncertainty += offset; + deltas += offset * 2; + + grad_sigmas += offset; + grad_rgbs += offset * 3; + grad_ambient += offset; + grad_uncertainty += offset; + + // accumulate + uint32_t step = 0; + + scalar_t T = 1.0f; + const scalar_t r_final = image[0], g_final = image[1], b_final = image[2], ws_final = weights_sum[0], amb_final = ambient_sum[0], unc_final = uncertainty_sum[0]; + scalar_t r = 0, g = 0, b = 0, ws = 0, amb = 0, unc = 0; + + while (step < num_steps) { + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + const scalar_t weight = alpha * T; + + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + // amb += ambient[0]; + unc += weight * uncertainty[0]; + ws += weight; + + T *= 1.0f - alpha; + + // check https://note.kiui.moe/others/nerf_gradient/ for the gradient calculation. + // write grad_rgbs + grad_rgbs[0] = grad_image[0] * weight; + grad_rgbs[1] = grad_image[1] * weight; + grad_rgbs[2] = grad_image[2] * weight; + + // write grad_ambient + grad_ambient[0] = grad_ambient_sum[0]; + + // write grad_unc + grad_uncertainty[0] = grad_uncertainty_sum[0] * weight; + + // write grad_sigmas + grad_sigmas[0] = deltas[0] * ( + grad_image[0] * (T * rgbs[0] - (r_final - r)) + + grad_image[1] * (T * rgbs[1] - (g_final - g)) + + grad_image[2] * (T * rgbs[2] - (b_final - b)) + + // grad_ambient_sum[0] * (T * ambient[0] - (amb_final - amb)) + + grad_uncertainty_sum[0] * (T * uncertainty[0] - (unc_final - unc)) + + grad_weights_sum[0] * (1 - ws_final) + ); + + //printf("[n=%d] num_steps=%d, T=%f, grad_sigmas=%f, r_final=%f, r=%f\n", n, step, T, grad_sigmas[0], r_final, r); + // minimal remained transmittence + if (T < T_thresh) break; + + // locate + sigmas++; + rgbs += 3; + // ambient++; + uncertainty++; + deltas += 2; + grad_sigmas++; + grad_rgbs += 3; + grad_ambient++; + grad_uncertainty++; + + step++; + } +} + + +void composite_rays_train_uncertainty_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_ambient_sum, const at::Tensor grad_uncertainty_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor uncertainty, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor ambient_sum, const at::Tensor uncertainty_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs, at::Tensor grad_ambient, at::Tensor grad_uncertainty) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_image.scalar_type(), "composite_rays_train_uncertainty_backward", ([&] { + kernel_composite_rays_train_uncertainty_backward<<>>(grad_weights_sum.data_ptr(), grad_ambient_sum.data_ptr(), grad_uncertainty_sum.data_ptr(), grad_image.data_ptr(), sigmas.data_ptr(), rgbs.data_ptr(), ambient.data_ptr(), uncertainty.data_ptr(), deltas.data_ptr(), rays.data_ptr(), weights_sum.data_ptr(), ambient_sum.data_ptr(), uncertainty_sum.data_ptr(), image.data_ptr(), M, N, T_thresh, grad_sigmas.data_ptr(), grad_rgbs.data_ptr(), grad_ambient.data_ptr(), grad_uncertainty.data_ptr()); + })); +} + + +//////////////////////////////////////////////////// +///////////// infernce ///////////// +//////////////////////////////////////////////////// + + +template +__global__ void kernel_composite_rays_uncertainty( + const uint32_t n_alive, + const uint32_t n_step, + const float T_thresh, + int* rays_alive, + scalar_t* rays_t, + const scalar_t* __restrict__ sigmas, + const scalar_t* __restrict__ rgbs, + const scalar_t* __restrict__ deltas, + const scalar_t* __restrict__ ambients, + const scalar_t* __restrict__ uncertainties, + scalar_t* weights_sum, scalar_t* depth, scalar_t* image, scalar_t* ambient_sum, scalar_t* uncertainty_sum +) { + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= n_alive) return; + + const int index = rays_alive[n]; // ray id + + // locate + sigmas += n * n_step; + rgbs += n * n_step * 3; + deltas += n * n_step * 2; + ambients += n * n_step; + uncertainties += n * n_step; + + rays_t += index; + weights_sum += index; + depth += index; + image += index * 3; + ambient_sum += index; + uncertainty_sum += index; + + scalar_t t = rays_t[0]; // current ray's t + + scalar_t weight_sum = weights_sum[0]; + scalar_t d = depth[0]; + scalar_t r = image[0]; + scalar_t g = image[1]; + scalar_t b = image[2]; + scalar_t a = ambient_sum[0]; + scalar_t u = uncertainty_sum[0]; + + // accumulate + uint32_t step = 0; + while (step < n_step) { + + // ray is terminated if delta == 0 + if (deltas[0] == 0) break; + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + + /* + T_0 = 1; T_i = \prod_{j=0}^{i-1} (1 - alpha_j) + w_i = alpha_i * T_i + --> + T_i = 1 - \sum_{j=0}^{i-1} w_j + */ + const scalar_t T = 1 - weight_sum; + const scalar_t weight = alpha * T; + weight_sum += weight; + + t = deltas[1]; + d += weight * t; + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + a += ambients[0]; + u += weight * uncertainties[0]; + + //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d); + + // ray is terminated if T is too small + // use a larger bound to further accelerate inference + if (T < T_thresh) break; + + // locate + sigmas++; + rgbs += 3; + deltas += 2; + step++; + ambients++; + uncertainties++; + } + + //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d); + + // rays_alive = -1 means ray is terminated early. + if (step < n_step) { + rays_alive[n] = -1; + } else { + rays_t[0] = t; + } + + weights_sum[0] = weight_sum; // this is the thing I needed! + depth[0] = d; + image[0] = r; + image[1] = g; + image[2] = b; + ambient_sum[0] = a; + uncertainty_sum[0] = u; +} + + +void composite_rays_uncertainty(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor ambients, at::Tensor uncertainties, at::Tensor weights, at::Tensor depth, at::Tensor image, at::Tensor ambient_sum, at::Tensor uncertainty_sum) { + static constexpr uint32_t N_THREAD = 128; + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + image.scalar_type(), "composite_rays_uncertainty", ([&] { + kernel_composite_rays_uncertainty<<>>(n_alive, n_step, T_thresh, rays_alive.data_ptr(), rays_t.data_ptr(), sigmas.data_ptr(), rgbs.data_ptr(), deltas.data_ptr(), ambients.data_ptr(), uncertainties.data_ptr(), weights.data_ptr(), depth.data_ptr(), image.data_ptr(), ambient_sum.data_ptr(), uncertainty_sum.data_ptr()); + })); +} + + + + +// -------------------------------- triplane ----------------------------- + +// sigmas: [M] +// rgbs: [M, 3] +// deltas: [M, 2] +// rays: [N, 3], idx, offset, num_steps +// weights_sum: [N], final pixel alpha +// depth: [N,] +// image: [N, 3] +template +__global__ void kernel_composite_rays_train_triplane_forward( + const scalar_t * __restrict__ sigmas, + const scalar_t * __restrict__ rgbs, + const scalar_t * __restrict__ amb_aud, + const scalar_t * __restrict__ amb_eye, + const scalar_t * __restrict__ uncertainty, + const scalar_t * __restrict__ deltas, + const int * __restrict__ rays, + const uint32_t M, const uint32_t N, const float T_thresh, + scalar_t * weights_sum, + scalar_t * amb_aud_sum, + scalar_t * amb_eye_sum, + scalar_t * uncertainty_sum, + scalar_t * depth, + scalar_t * image +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + uint32_t index = rays[n * 3]; + uint32_t offset = rays[n * 3 + 1]; + uint32_t num_steps = rays[n * 3 + 2]; + + // empty ray, or ray that exceed max step count. + if (num_steps == 0 || offset + num_steps > M) { + weights_sum[index] = 0; + amb_aud_sum[index] = 0; + amb_eye_sum[index] = 0; + uncertainty_sum[index] = 0; + depth[index] = 0; + image[index * 3] = 0; + image[index * 3 + 1] = 0; + image[index * 3 + 2] = 0; + return; + } + + sigmas += offset; + rgbs += offset * 3; + amb_aud += offset; + amb_eye += offset; + uncertainty += offset; + deltas += offset * 2; + + // accumulate + uint32_t step = 0; + + scalar_t T = 1.0f; + scalar_t r = 0, g = 0, b = 0, ws = 0, d = 0, a_aud = 0, a_eye=0, unc = 0; + + while (step < num_steps) { + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + const scalar_t weight = alpha * T; + + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + + d += weight * deltas[1]; + + ws += weight; + + a_aud += amb_aud[0]; + a_eye += amb_eye[0]; + unc += weight * uncertainty[0]; + + T *= 1.0f - alpha; + + // minimal remained transmittence + if (T < T_thresh) break; + + //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d); + + // locate + sigmas++; + rgbs += 3; + amb_aud++; + amb_eye++; + uncertainty++; + deltas += 2; + + step++; + } + + //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d); + + // write + weights_sum[index] = ws; // weights_sum + amb_aud_sum[index] = a_aud; + amb_eye_sum[index] = a_eye; + uncertainty_sum[index] = unc; + depth[index] = d; + image[index * 3] = r; + image[index * 3 + 1] = g; + image[index * 3 + 2] = b; +} + + +void composite_rays_train_triplane_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor amb_aud, const at::Tensor amb_eye, const at::Tensor uncertainty, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor amb_aud_sum, at::Tensor amb_eye_sum, at::Tensor uncertainty_sum, at::Tensor depth, at::Tensor image) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + sigmas.scalar_type(), "composite_rays_train_triplane_forward", ([&] { + kernel_composite_rays_train_triplane_forward<<>>(sigmas.data_ptr(), rgbs.data_ptr(), amb_aud.data_ptr(), amb_eye.data_ptr(), uncertainty.data_ptr(), deltas.data_ptr(), rays.data_ptr(), M, N, T_thresh, weights_sum.data_ptr(), amb_aud_sum.data_ptr(), amb_eye_sum.data_ptr(), uncertainty_sum.data_ptr(), depth.data_ptr(), image.data_ptr()); + })); +} + + +// grad_weights_sum: [N,] +// grad: [N, 3] +// sigmas: [M] +// rgbs: [M, 3] +// deltas: [M, 2] +// rays: [N, 3], idx, offset, num_steps +// weights_sum: [N,], weights_sum here +// image: [N, 3] +// grad_sigmas: [M] +// grad_rgbs: [M, 3] +template +__global__ void kernel_composite_rays_train_triplane_backward( + const scalar_t * __restrict__ grad_weights_sum, + const scalar_t * __restrict__ grad_amb_aud_sum, + const scalar_t * __restrict__ grad_amb_eye_sum, + const scalar_t * __restrict__ grad_uncertainty_sum, + const scalar_t * __restrict__ grad_image, + const scalar_t * __restrict__ sigmas, + const scalar_t * __restrict__ rgbs, + const scalar_t * __restrict__ amb_aud, + const scalar_t * __restrict__ amb_eye, + const scalar_t * __restrict__ uncertainty, + const scalar_t * __restrict__ deltas, + const int * __restrict__ rays, + const scalar_t * __restrict__ weights_sum, + const scalar_t * __restrict__ amb_aud_sum, + const scalar_t * __restrict__ amb_eye_sum, + const scalar_t * __restrict__ uncertainty_sum, + const scalar_t * __restrict__ image, + const uint32_t M, const uint32_t N, const float T_thresh, + scalar_t * grad_sigmas, + scalar_t * grad_rgbs, + scalar_t * grad_amb_aud, + scalar_t * grad_amb_eye, + scalar_t * grad_uncertainty +) { + // parallel per ray + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= N) return; + + // locate + uint32_t index = rays[n * 3]; + uint32_t offset = rays[n * 3 + 1]; + uint32_t num_steps = rays[n * 3 + 2]; + + if (num_steps == 0 || offset + num_steps > M) return; + + grad_weights_sum += index; + grad_amb_aud_sum += index; + grad_amb_eye_sum += index; + grad_uncertainty_sum += index; + grad_image += index * 3; + weights_sum += index; + amb_aud_sum += index; + amb_eye_sum += index; + uncertainty_sum += index; + image += index * 3; + + sigmas += offset; + rgbs += offset * 3; + amb_aud += offset; + amb_eye += offset; + uncertainty += offset; + deltas += offset * 2; + + grad_sigmas += offset; + grad_rgbs += offset * 3; + grad_amb_aud += offset; + grad_amb_eye += offset; + grad_uncertainty += offset; + + // accumulate + uint32_t step = 0; + + scalar_t T = 1.0f; + const scalar_t r_final = image[0], g_final = image[1], b_final = image[2], ws_final = weights_sum[0], unc_final = uncertainty_sum[0]; + scalar_t r = 0, g = 0, b = 0, ws = 0, amb = 0, unc = 0; + + while (step < num_steps) { + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + const scalar_t weight = alpha * T; + + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + // amb += ambient[0]; + unc += weight * uncertainty[0]; + ws += weight; + + T *= 1.0f - alpha; + + // check https://note.kiui.moe/others/nerf_gradient/ for the gradient calculation. + // write grad_rgbs + grad_rgbs[0] = grad_image[0] * weight; + grad_rgbs[1] = grad_image[1] * weight; + grad_rgbs[2] = grad_image[2] * weight; + + // write grad_ambient + grad_amb_aud[0] = grad_amb_aud_sum[0]; + grad_amb_eye[0] = grad_amb_eye_sum[0]; + + // write grad_unc + grad_uncertainty[0] = grad_uncertainty_sum[0] * weight; + + // write grad_sigmas + grad_sigmas[0] = deltas[0] * ( + grad_image[0] * (T * rgbs[0] - (r_final - r)) + + grad_image[1] * (T * rgbs[1] - (g_final - g)) + + grad_image[2] * (T * rgbs[2] - (b_final - b)) + + // grad_ambient_sum[0] * (T * ambient[0] - (amb_final - amb)) + + grad_uncertainty_sum[0] * (T * uncertainty[0] - (unc_final - unc)) + + grad_weights_sum[0] * (1 - ws_final) + ); + + //printf("[n=%d] num_steps=%d, T=%f, grad_sigmas=%f, r_final=%f, r=%f\n", n, step, T, grad_sigmas[0], r_final, r); + // minimal remained transmittence + if (T < T_thresh) break; + + // locate + sigmas++; + rgbs += 3; + // ambient++; + uncertainty++; + deltas += 2; + grad_sigmas++; + grad_rgbs += 3; + grad_amb_aud++; + grad_amb_eye++; + grad_uncertainty++; + + step++; + } +} + + +void composite_rays_train_triplane_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_amb_aud_sum, const at::Tensor grad_amb_eye_sum, const at::Tensor grad_uncertainty_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor amb_aud, const at::Tensor amb_eye, const at::Tensor uncertainty, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor amb_aud_sum, const at::Tensor amb_eye_sum, const at::Tensor uncertainty_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs, at::Tensor grad_amb_aud, at::Tensor grad_amb_eye, at::Tensor grad_uncertainty) { + + static constexpr uint32_t N_THREAD = 128; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_image.scalar_type(), "composite_rays_train_triplane_backward", ([&] { + kernel_composite_rays_train_triplane_backward<<>>(grad_weights_sum.data_ptr(), grad_amb_aud_sum.data_ptr(), grad_amb_eye_sum.data_ptr(), grad_uncertainty_sum.data_ptr(), grad_image.data_ptr(), sigmas.data_ptr(), rgbs.data_ptr(), amb_aud.data_ptr(), amb_eye.data_ptr(), uncertainty.data_ptr(), deltas.data_ptr(), rays.data_ptr(), weights_sum.data_ptr(), amb_aud_sum.data_ptr(), amb_eye_sum.data_ptr(), uncertainty_sum.data_ptr(), image.data_ptr(), M, N, T_thresh, grad_sigmas.data_ptr(), grad_rgbs.data_ptr(), grad_amb_aud.data_ptr(), grad_amb_eye.data_ptr(), grad_uncertainty.data_ptr()); + })); +} + + +//////////////////////////////////////////////////// +///////////// infernce ///////////// +//////////////////////////////////////////////////// + + +template +__global__ void kernel_composite_rays_triplane( + const uint32_t n_alive, + const uint32_t n_step, + const float T_thresh, + int* rays_alive, + scalar_t* rays_t, + const scalar_t* __restrict__ sigmas, + const scalar_t* __restrict__ rgbs, + const scalar_t* __restrict__ deltas, + const scalar_t* __restrict__ ambs_aud, + const scalar_t* __restrict__ ambs_eye, + const scalar_t* __restrict__ uncertainties, + scalar_t* weights_sum, scalar_t* depth, scalar_t* image, scalar_t* amb_aud_sum, scalar_t* amb_eye_sum, scalar_t* uncertainty_sum +) { + const uint32_t n = threadIdx.x + blockIdx.x * blockDim.x; + if (n >= n_alive) return; + + const int index = rays_alive[n]; // ray id + + // locate + sigmas += n * n_step; + rgbs += n * n_step * 3; + deltas += n * n_step * 2; + ambs_aud += n * n_step; + ambs_eye += n * n_step; + uncertainties += n * n_step; + + rays_t += index; + weights_sum += index; + depth += index; + image += index * 3; + amb_aud_sum += index; + amb_eye_sum += index; + uncertainty_sum += index; + + scalar_t t = rays_t[0]; // current ray's t + + scalar_t weight_sum = weights_sum[0]; + scalar_t d = depth[0]; + scalar_t r = image[0]; + scalar_t g = image[1]; + scalar_t b = image[2]; + scalar_t a_aud = amb_aud_sum[0]; + scalar_t a_eye = amb_eye_sum[0]; + scalar_t u = uncertainty_sum[0]; + + // accumulate + uint32_t step = 0; + while (step < n_step) { + + // ray is terminated if delta == 0 + if (deltas[0] == 0) break; + + const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]); + + /* + T_0 = 1; T_i = \prod_{j=0}^{i-1} (1 - alpha_j) + w_i = alpha_i * T_i + --> + T_i = 1 - \sum_{j=0}^{i-1} w_j + */ + const scalar_t T = 1 - weight_sum; + const scalar_t weight = alpha * T; + weight_sum += weight; + + t = deltas[1]; + d += weight * t; + r += weight * rgbs[0]; + g += weight * rgbs[1]; + b += weight * rgbs[2]; + a_aud += ambs_aud[0]; + a_eye += ambs_eye[0]; + u += weight * uncertainties[0]; + + //printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d); + + // ray is terminated if T is too small + // use a larger bound to further accelerate inference + if (T < T_thresh) break; + + // locate + sigmas++; + rgbs += 3; + deltas += 2; + step++; + ambs_aud++; + ambs_eye++; + uncertainties++; + } + + //printf("[n=%d] rgb=(%f, %f, %f), d=%f\n", n, r, g, b, d); + + // rays_alive = -1 means ray is terminated early. + if (step < n_step) { + rays_alive[n] = -1; + } else { + rays_t[0] = t; + } + + weights_sum[0] = weight_sum; // this is the thing I needed! + depth[0] = d; + image[0] = r; + image[1] = g; + image[2] = b; + amb_aud_sum[0] = a_aud; + amb_eye_sum[0] = a_eye; + uncertainty_sum[0] = u; +} + + +void composite_rays_triplane(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor ambs_aud, at::Tensor ambs_eye, at::Tensor uncertainties, at::Tensor weights, at::Tensor depth, at::Tensor image, at::Tensor amb_aud_sum, at::Tensor amb_eye_sum, at::Tensor uncertainty_sum) { + static constexpr uint32_t N_THREAD = 128; + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + image.scalar_type(), "composite_rays_triplane", ([&] { + kernel_composite_rays_triplane<<>>(n_alive, n_step, T_thresh, rays_alive.data_ptr(), rays_t.data_ptr(), sigmas.data_ptr(), rgbs.data_ptr(), deltas.data_ptr(), ambs_aud.data_ptr(), ambs_eye.data_ptr(), uncertainties.data_ptr(), weights.data_ptr(), depth.data_ptr(), image.data_ptr(), amb_aud_sum.data_ptr(), amb_eye_sum.data_ptr(), uncertainty_sum.data_ptr()); + })); +} diff --git a/raymarching/src/raymarching.h b/raymarching/src/raymarching.h new file mode 100644 index 0000000..cd08969 --- /dev/null +++ b/raymarching/src/raymarching.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + + +void near_far_from_aabb(const at::Tensor rays_o, const at::Tensor rays_d, const at::Tensor aabb, const uint32_t N, const float min_near, at::Tensor nears, at::Tensor fars); +void sph_from_ray(const at::Tensor rays_o, const at::Tensor rays_d, const float radius, const uint32_t N, at::Tensor coords); +void morton3D(const at::Tensor coords, const uint32_t N, at::Tensor indices); +void morton3D_invert(const at::Tensor indices, const uint32_t N, at::Tensor coords); +void packbits(const at::Tensor grid, const uint32_t N, const float density_thresh, at::Tensor bitfield); +void morton3D_dilation(const at::Tensor grid, const uint32_t C, const uint32_t H, at::Tensor grid_dilation); + +void march_rays_train(const at::Tensor rays_o, const at::Tensor rays_d, const at::Tensor grid, const float bound, const float dt_gamma, const uint32_t max_steps, const uint32_t N, const uint32_t C, const uint32_t H, const uint32_t M, const at::Tensor nears, const at::Tensor fars, at::Tensor xyzs, at::Tensor dirs, at::Tensor deltas, at::Tensor rays, at::Tensor counter, at::Tensor noises); +void march_rays_train_backward(const at::Tensor grad_xyzs, const at::Tensor grad_dirs, const at::Tensor rays, const at::Tensor deltas, const uint32_t N, const uint32_t M, at::Tensor grad_rays_o, at::Tensor grad_rays_d); +void composite_rays_train_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor ambient_sum, at::Tensor depth, at::Tensor image); +void composite_rays_train_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_ambient_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor ambient_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs, at::Tensor grad_ambient); + +void march_rays(const uint32_t n_alive, const uint32_t n_step, const at::Tensor rays_alive, const at::Tensor rays_t, const at::Tensor rays_o, const at::Tensor rays_d, const float bound, const float dt_gamma, const uint32_t max_steps, const uint32_t C, const uint32_t H, const at::Tensor grid, const at::Tensor nears, const at::Tensor fars, at::Tensor xyzs, at::Tensor dirs, at::Tensor deltas, at::Tensor noises); +void composite_rays(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor weights_sum, at::Tensor depth, at::Tensor image); +void composite_rays_ambient(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor ambients, at::Tensor weights, at::Tensor depth, at::Tensor image, at::Tensor ambient_sum); + + +void composite_rays_train_sigma_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor ambient_sum, at::Tensor depth, at::Tensor image); +void composite_rays_train_sigma_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_ambient_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor ambient_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs, at::Tensor grad_ambient); + +void composite_rays_ambient_sigma(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor ambients, at::Tensor weights, at::Tensor depth, at::Tensor image, at::Tensor ambient_sum); + + +// uncertainty +void composite_rays_train_uncertainty_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor uncertainty, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor ambient_sum, at::Tensor uncertainty_sum, at::Tensor depth, at::Tensor image); +void composite_rays_train_uncertainty_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_ambient_sum, const at::Tensor grad_uncertainty_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor ambient, const at::Tensor uncertainty, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor ambient_sum, const at::Tensor uncertainty_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs, at::Tensor grad_ambient, at::Tensor grad_uncertainty); +void composite_rays_uncertainty(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor ambients, at::Tensor uncertainties, at::Tensor weights, at::Tensor depth, at::Tensor image, at::Tensor ambient_sum, at::Tensor uncertainty_sum); + +// triplane +void composite_rays_train_triplane_forward(const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor amb_aud, const at::Tensor amb_eye, const at::Tensor uncertainty, const at::Tensor deltas, const at::Tensor rays, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor weights_sum, at::Tensor amb_aud_sum, at::Tensor amb_eye_sum, at::Tensor uncertainty_sum, at::Tensor depth, at::Tensor image); +void composite_rays_train_triplane_backward(const at::Tensor grad_weights_sum, const at::Tensor grad_amb_aud_sum, const at::Tensor grad_amb_eye_sum, const at::Tensor grad_uncertainty_sum, const at::Tensor grad_image, const at::Tensor sigmas, const at::Tensor rgbs, const at::Tensor amb_aud, const at::Tensor amb_eye, const at::Tensor uncertainty, const at::Tensor deltas, const at::Tensor rays, const at::Tensor weights_sum, const at::Tensor amb_aud_sum, const at::Tensor amb_eye_sum, const at::Tensor uncertainty_sum, const at::Tensor image, const uint32_t M, const uint32_t N, const float T_thresh, at::Tensor grad_sigmas, at::Tensor grad_rgbs, at::Tensor grad_amb_aud, at::Tensor grad_amb_eye, at::Tensor grad_uncertainty); +void composite_rays_triplane(const uint32_t n_alive, const uint32_t n_step, const float T_thresh, at::Tensor rays_alive, at::Tensor rays_t, at::Tensor sigmas, at::Tensor rgbs, at::Tensor deltas, at::Tensor ambs_aud, at::Tensor ambs_eye, at::Tensor uncertainties, at::Tensor weights, at::Tensor depth, at::Tensor image, at::Tensor amb_aud_sum, at::Tensor amb_eye_sum, at::Tensor uncertainty_sum); \ No newline at end of file diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..482e514 --- /dev/null +++ b/readme.md @@ -0,0 +1,140 @@ +# ER-NeRF + +This is the official repo for our ICCV2023 paper **Efficient Region-Aware Neural Radiance Fields for High-Fidelity Talking Portrait Synthesis**. + +![image](assets/main.png) + +## Install + +Tested on Ubuntu 18.04, Pytorch 1.12 and CUDA 11.3. + +### Install dependency + +```bash +conda install pytorch==1.12.1 cudatoolkit=11.3 -c pytorch +pip install -r requirements.txt +pip install "git+https://github.com/facebookresearch/pytorch3d.git" +pip install tensorflow-gpu==2.8.0 +``` + +### Preparation + +- Prepare face-parsing model. + + ```bash + wget https://github.com/YudongGuo/AD-NeRF/blob/master/data_util/face_parsing/79999_iter.pth?raw=true -O data_utils/face_parsing/79999_iter.pth + ``` + +- Prepare the 3DMM model for head pose estimation. + + ```bash + wget https://github.com/YudongGuo/AD-NeRF/blob/master/data_util/face_tracking/3DMM/exp_info.npy?raw=true -O data_utils/face_tracking/3DMM/exp_info.npy + wget https://github.com/YudongGuo/AD-NeRF/blob/master/data_util/face_tracking/3DMM/keys_info.npy?raw=true -O data_utils/face_tracking/3DMM/keys_info.npy + wget https://github.com/YudongGuo/AD-NeRF/blob/master/data_util/face_tracking/3DMM/sub_mesh.obj?raw=true -O data_utils/face_tracking/3DMM/sub_mesh.obj + wget https://github.com/YudongGuo/AD-NeRF/blob/master/data_util/face_tracking/3DMM/topology_info.npy?raw=true -O data_utils/face_tracking/3DMM/topology_info.npy + ``` + +- Download 3DMM model from [Basel Face Model 2009](https://faces.dmi.unibas.ch/bfm/main.php?nav=1-1-0&id=details): + + ``` + cp 01_MorphableModel.mat data_util/face_tracking/3DMM/ + cd data_util/face_tracking + python convert_BFM.py + ``` + +## Datasets and pretrained models + +We get the experiment videos mainly from [DFRF](https://github.com/sstzal/DFRF) and YouTube. Due to copyright restrictions, we can't distribute them. You can download these videos and crop them by youself. Here is an example training video (Obama) from AD-NeRF with the resolution of 450x450. + +``` +mkdir -p data/obama +wget https://github.com/YudongGuo/AD-NeRF/blob/master/dataset/vids/Obama.mp4?raw=true -O data/obama/obama.mp4 +``` + +We also provide pretrained checkpoints on the Obama video clip. You can download and test them after completing the data pre-processing step by: + +```bash +python main.py data/obama/ --workspace trial_obama/ -O --test --ckpt trial_obama/checkpoints/ngp.pth # head +python main.py data/obama/ --workspace trial_obama/ -O --test --torso --ckpt trial_obama_torso/checkpoints/ngp.pth # head+torso +``` + +The test results should be about: + +| setting | PSNR | LPIPS | LMD | +| ---------- | ------ | ------ | ----- | +| head | 35.607 | 0.0178 | 2.525 | +| head+torso | 26.594 | 0.0446 | 2.550 | + +## Usage + +### Pre-processing Custom Training Video + +* Put training video under `data//.mp4`. + + The video **must be 25FPS, with all frames containing the talking person**. + The resolution should be about 512x512, and duration about 1-5 min. + +* Run script to process the video. (may take several hours) + + ```bash + python data_utils/process.py data//.mp4 + ``` + +### Audio Pre-process + +In our paper, we use DeepSpeech features for evaluation: + +```bash +python data_utils/deepspeech_features/extract_ds_features.py --input data/.wav # save to data/.npy +``` + +You can also try to extract audio features via Wav2Vec like [RAD-NeRF](https://github.com/ashawkey/RAD-NeRF) by: + +```bash +python nerf/asr.py --wav data/.wav --save_feats # save to data/_eo.npy +``` + +### Train + +First time running will take some time to compile the CUDA extensions. + +```bash +# train (head and lpips finetune) +python main.py data/obama/ --workspace trial_obama/ -O --iters 100000 +python main.py data/obama/ --workspace trial_obama/ -O --iters 125000 --finetune_lips --patch_size 32 + +# train (torso) +# .pth should be the latest checkpoint in trial_obama +python main.py data/obama/ --workspace trial_obama_torso/ -O --torso --head_ckpt .pth --iters 200000 +``` + +### Test + +```bash +# test on the test split +python main.py data/obama/ --workspace trial_obama/ -O --test # only render the head and use GT image for torso +python main.py data/obama/ --workspace trial_obama_torso/ -O --torso --test # render both head and torso +``` + +### Inference with target audio + +```bash +python main.py data/obama/ --workspace trial_obama_torso/ -O --torso --test --test_train --aud data/