From d830b8085e15a764c5655dd69818a824c2420608 Mon Sep 17 00:00:00 2001 From: MrPowers Date: Wed, 26 Aug 2020 18:46:40 -0500 Subject: [PATCH 1/2] Add Parquet predicate pushdown filtering example --- data/pets_parquet/_common_metadata | Bin 0 -> 1290 bytes data/pets_parquet/_metadata | Bin 0 -> 2135 bytes data/pets_parquet/part.0.parquet | Bin 0 -> 1822 bytes data/pets_parquet/part.1.parquet | Bin 0 -> 1800 bytes data/pets_parquet/part.2.parquet | Bin 0 -> 1800 bytes data/pets_parquet/part.3.parquet | Bin 0 -> 1800 bytes .../05-predicate-pushdown-filtering.ipynb | 381 ++++++++++++++++++ 7 files changed, 381 insertions(+) create mode 100644 data/pets_parquet/_common_metadata create mode 100644 data/pets_parquet/_metadata create mode 100644 data/pets_parquet/part.0.parquet create mode 100644 data/pets_parquet/part.1.parquet create mode 100644 data/pets_parquet/part.2.parquet create mode 100644 data/pets_parquet/part.3.parquet create mode 100644 dataframes/05-predicate-pushdown-filtering.ipynb diff --git a/data/pets_parquet/_common_metadata b/data/pets_parquet/_common_metadata new file mode 100644 index 0000000000000000000000000000000000000000..f9851f238ca5cf0dfc380527fdced9b9b1696729 GIT binary patch literal 1290 zcmZ`(+iu!G5Vg`M>O)?sh|~vUDQ|>g0;!1Fhc&^*G>`-mjH#-~mklhj7uOdIiTWS? z!Tv_onKjL|UCEc3Gc#vr_SolSpcJCNf!tVZ7@hxZWEw=-@FP*-Z zB&o0s&NudyO7En--Mw;gC$za|$HiY;_cG%iU0u5iKMi;+AD6yvpp+6|+=cl|FUC%jJ9p-4(0YvEDM;b{=AAAp53r*4A&fF`zQCpK{K)c1ds zOOKq;%4@of^92**$x;0^&x89Vjp#n2cCv_o`Cj&!6WP%kI}O)%6fH6=wxm(a7971w zrCL#B*q*#$eugQCNyS|<7@V2M`7srL?tH*iowf)e7jWVFLP(vcQ=$O4 zLudN|?`8m_4NFuJxIsup0DvbJQLBhyAknYjp?)+1Kb`2K`nc9#I-ODC@E$v3sxuyD zQzO{;{ATC`1E1=nJ7d0>no2b`THU!*TPcp&$bE+~a{?`y42>heQrqlD8yjN-+Z@Fc zt(jV;;$saXXoOZR$!!K)T%7^O%vG9II#%--KEgW7iGIaSTZB1$l(-%L#_RamhG%I7 zp>G{t22&G^~_a#1AWkS_X)4YA@@IfdcZQvY}{Jo{!^~d;TN@jOXNar zC8MJ!dxbfACsx(KUINAUbPe(cGwc=i9XlPx!yc@|t`GjD%V16&n3?V~H_+BJpIb&v zZS4llLs5rMe3$n_r&fo42Us!hh-JKJogVy^Rc3UMogSH&sg&DAwH zW9<%JQDfv1Z$cy|J(M08Dq%u2SBMCG;ET{rhDn`VKfSMWQ+OOil~t?CVXa>-GwX$s%qsgz{Fn#8!x-+ zz3M5)oO;Z;e@2fv^psOC{U3Vhp>M_x>@Flq2E+KhH}ChG-^?51Gu-2o(bNwW#Ko7& zOf#hXQ3v?ak!Xme?rn0^Y7S>B!>Mn0$1*@XztBjNwK|>K6v*@dKYDx-0KMY?vAA5 z7fX_58Ma%J_HcyDw8i`kNlYk&OOSmgjqK8{cMly%*(a&=#gYue;9`@>WDiFuX^Z(8 zk`4kLPzcMsi;q_`tls{Q(cgUeSg|EL!VJ~WV2^z&B1khZcjzNux!^lbGdlM%VH()m6p0}Hr#Q)eFQO2 z61$7gCSy%W94yC;sy7mOgJ87HiZd#@fvp3VCCq?hrtsB*HWY#})_~xw#?1@;xQ2Dv z8n{Z!n5Zqo4|!H2(=kS+^B^t`3L~*MQ=4_7!BfSn&`9Sh6TAFnbRibkHpHW z(Xe})%aN_~Vz)+f0h{-=!L7N%I;zh{g*MS*@`l=MKz?sVw8FfXa*J1qhEb_wfPbav zuud&lv!*i}>W38Cg62-B`hGKOcxmX50SU>5P{o{FECz@`pS8+V-TuWlr zqw6}adob>*Z58M)lOLtofIV(p!!4;X{F408B6)s7N)HSH>$GStNQBl0xuJFAlcZKq zKh!4#MS22;FdG*yNYO#wl+-JXqjh(_1*tchBrO09>XX9IA=IXmYHsU+hVj#7r6=VIQCKox4AqOWGHCjxX`0Gd{(0Ul76(LZ;r- zI{+D29;$X|hdkkr`Qsuw!I{dQl9vYn3mp1_PZG{lB6;v7Dg2tq@<*8(M^YK#(6#jU zuIiXPNvG1kB;Z%*7Y_HrGeyYC@ks(zP!JSlzWmS=_JT(u7Uc7-+;2)ad_aW52j4V# z`HU43>~#DSm*u%_#|P>xZuUbW{UK5KD_Qt0nf(`upP40@`KCsM)JmV5j>>a{KYEdT z+RO`bmW6hB4vbhF7M00nivP(P3;*wA=|7X{6f_F*n2=0SNcpO3s4L-*Exw%*jL8)G}@3<~bxTcoS@V78F91XP6k{ zhK|2^1UzkF>H*pvGf)jRP@#q!+V+pd+*3_h|EJ})#=T0G8=O`@t@CK!FT=D_ z`T5LO6S(+mUlR@VcuuxTc%dL*Sj;ah-6S*Fa7172V9jE4xD?zMVP|KC(P-Cu( z^|CpVVjp*idDfHGwRP5@mgWuvy=_m8wjFKjtjCUTk58^*Tp5Dpjl>lW;(4LfVumhQ+KGu~TdD)L5m>L)CZJF#`Q=WCTJ%tNj7 zK{YyCG%cKIXJ3`csC%a>^^om#(G0=ww&RxBQ^t$llHnQ4cW{dwqwny-Nb>d^sRxQg zEk>GaL>NEthVdrCpmxk3W|Lx)o`WLICzWd?9mLI$ui(dcclDH{DYq~zP8#!*rC}~i zXHB)9_`qQL<_!_xEn5B%n!zjG_jB73j5?YbKCPx~SA%VPD_#opZi(jz8dn_C=oK3w+@eHtv9s3?an# zecXZ|;0b@gA5;+IvwXI2LSAOTEM&k1JkoG+sZ=Ii{xwzL_w#j*Wb?wFXB+Q5&9!)v z<8r^G;8Xsty!SXbpCnBapx}5+1Y40W?$!ZD@+ID2J*`C8`-BL4SS+p_dHIYLfNhB7Ir`%DYz{I_)?WS7Rway6bK{Qisdb{;RtX#uL? zIWQtQj3-~nSN@CPmH*$ya=)i@9MlQ>SA^s%LN?GmQ(FpurSD6&XIi({hVMkKw=BIb z{_ta^SW1r946~kC+3~)F5&`$pC%Kd$?8den^ZGd?A-oRP0Sk&C$~P^H@gg@^KLEZy zw~P?&t`%yg7HYuZMUL}xrMTsU>3>eQF+O3kz3}AtbDjtDQySL&!qP&20nGO$$JQ6x zVvU^!Yi+UcV@$TP7E9ar&?+8Q55*FcSK2T?!bHS~uuuE_bM%9KuNbINRz^azj9*m}UbF~E3+#j-HGLupI_fE|-aRm9MdDp#;c-*j9j znkd8LQLVSq+rv=z&g^qr(no`M+Hp5FrxECG-?5b8U1#o3Rk6D4G`n-Xb}Q;?J+U1v zZQYf_$)IxrSfr`F#paB@uBHx`6S)zMRnfs59k(8gYhiM>fywt$V}JXgfD%+le%p!eOT-64zKJ% z>_61>fW>3A{(d~_?Q#XWcUkK-SuErhV{5omE6mY+JFa%H7guylMTYqP410yTmwH|(hAXeFWNxvg%4@@I9I-s!#4)^Hpx2I}A8MZsj^opL%f_B| z=2e@Fx_6q|h}c}`tqA;XJ6@H&WxN@R4DVRJgGbaDeTO$DlDB6lJvd0zW}>-5gy{nx zm~LVWYA5<(HYpM5862e9q;`d(gSaW`75td)F1JZqvVvhr(nOyu4Rc{X*kcxVsSxg`+s;#uo6dTxtB+y`>T1EaBFtv>fHV%n; z-(!#c8vPbM?qNT`UiN$J%rgl|nmw%4TG-ETelzo%8UJifX;0)iKF8;dVBz)%NfAOc zO~Xakb)N8h{9Xw$KFz0dhvaz*%t8uWz#|DKmq?_N#gB;`zniUbB%Kv@JllBZX|BbS z443&M0l(s}#hr)2**IyE00qZmB3SZ#{~w*ttc7om<}wdH#eI5^U@I zJC)A-y!FmaFj`;dh@UxAS;Lj&e{9 z&w&xiVLaJfw)8EASNy(>W&TWNIH(iW9|_5pgtV`DrnVIRN!}D}&$O;CjJXrK-m>sA z|MvG%z7TIO7-l`Pvh7U)B?9h+Te%bJ9d%L>F2iV zgkxn;9ag$4y)_7Q@7O-EC4JbBCT(|PbLzhC_8dzYT(@WQi7J+t?M7#&SFS`|t;M#3 zrLDViFz&Yx0Sh&?yV&f}*VWX)axB-wkt#ZvqwUuGQ6-4?HgI{;6gZ}aST9>cDfaOO zm}fOs&h4WHwRLY07%k`0Y&p?}XEkztXLN8DS8cD%o-$qxMTTcAU&AeGjK0AO6Upmilpbs(YBSNCBf|87 zH%vDX2DM`SFq;&M^cXhMbX++{(Lvk<^$LDWcW2uqOVd)Z x)hi;vTeNr|TEPp$_w$=qDKGBV_QeKU&0)$O$%Q3H>Lt?_KVB@o(5TX{Qg)v^}`kQ-T zq0cQNK)Y)NnyCdEaCo8P{9MZKIAQu<(rt}Tm~1aNu71gLZ+=R{x?fsauvh}~ZNahi zrMBE)r~XD;E*B9dTU*POz3|W~9+nTq0+d(SGC#sZ#E7s@d%ZLDb*wDmQ~Fyx%F_m= z^a4&izNfTGr74vuozljX*(B)^9-*;$k9Q(~aSsQ}!tgew5d{ENOd^#LLr1Dy!Xkaw zcAapn464IQcdfSuf$p8!XSSpd`_Z)RZf#E8*WI3DDTAB#d@)tU@~YkF%=OB(sH?Tu zcC@l}R}RMg_Ay|grgoRxJ^H$uI#`Y6dN@)=2XnODnm?)p@!ke5PnrS8%n<8kYbeD& z-T?Eg$I7LB(xA5P4FaR(Ow5)OZF$xs*LOxo7cs65$|JQmGnFRx5Sr@sY65*&o6ZAX z*@M`BsOcVyMr!TjXxM$orQ2Y!kXwwb!Gl_1j^_JOxsAQJqGKvD#P?>{E7ZNx zTcU|QjE*`E*w=;)b85lN6vsAPd1J+Mi!4>%7;gQDTzi7==Y>xbE-Sfr=0k!It{C5jH>rl?o&W4gQ8C27hEhQ&!^eX=ynh3#2U%1#dq yrf=U80p6nJPoWjOGJHS3eU\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nicknameage
0fofo3
1tio1
2lulu9
\n", + "" + ], + "text/plain": [ + " nickname age\n", + "0 fofo 3\n", + "1 tio 1\n", + "2 lulu 9" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dd.read_parquet('../data/pets_parquet/part.0.parquet').head(3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inefficient approach" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read in all of the Parquet files into a DataFrame and perform a filtering operation to grab all the pets that are older than 10" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df1 = dd.read_parquet('../data/pets_parquet/*')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df1 = df1[df1['age'] > 10]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nicknameage
1lll12
\n", + "
" + ], + "text/plain": [ + " nickname age\n", + "1 lll 12" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.head(1, npartitions=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.npartitions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This approach sends all four Parquet files to the cluster. We know all four files are getting sent to Dask because four partitions are created.\n", + "\n", + "Dask needs to filter over all the files, even in the files we know don't have any pets greater older than 10. Let's use predicate pushdown filtering so we don't needlessly filter files that don't contain any matching data." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Efficient approach" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "df1 = dd.read_parquet('../data/pets_parquet/*', filters=[('age', '>', 10)])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nickname
age
7fff
\n", + "
" + ], + "text/plain": [ + " nickname\n", + "age \n", + "7 fff" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.head(1, npartitions=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.npartitions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This approach performs the Parquet predicate pushdown filtering. We can tell because the DataFrame only has one partition, so Dask only read one file. When the `filters` parameter is populated, Dask will intelligently inspect the metadata of the Parquet files and skip entire files whenever possible." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 6622df980ed1280127d27c1d80c78b78bdfe2959 Mon Sep 17 00:00:00 2001 From: MrPowers Date: Thu, 27 Aug 2020 12:47:26 -0500 Subject: [PATCH 2/2] Use CSV files because they're nicer in git repos --- data/pets/pets1.csv | 4 + data/pets/pets2.csv | 3 + data/pets/pets3.csv | 3 + data/pets/pets4.csv | 3 + data/pets_parquet/_common_metadata | Bin 1290 -> 0 bytes data/pets_parquet/_metadata | Bin 2135 -> 0 bytes data/pets_parquet/part.0.parquet | Bin 1822 -> 0 bytes data/pets_parquet/part.1.parquet | Bin 1800 -> 0 bytes data/pets_parquet/part.2.parquet | Bin 1800 -> 0 bytes data/pets_parquet/part.3.parquet | Bin 1800 -> 0 bytes .../05-predicate-pushdown-filtering.ipynb | 76 ++++++++++-------- 11 files changed, 54 insertions(+), 35 deletions(-) create mode 100644 data/pets/pets1.csv create mode 100644 data/pets/pets2.csv create mode 100644 data/pets/pets3.csv create mode 100644 data/pets/pets4.csv delete mode 100644 data/pets_parquet/_common_metadata delete mode 100644 data/pets_parquet/_metadata delete mode 100644 data/pets_parquet/part.0.parquet delete mode 100644 data/pets_parquet/part.1.parquet delete mode 100644 data/pets_parquet/part.2.parquet delete mode 100644 data/pets_parquet/part.3.parquet diff --git a/data/pets/pets1.csv b/data/pets/pets1.csv new file mode 100644 index 00000000..19e6c4b0 --- /dev/null +++ b/data/pets/pets1.csv @@ -0,0 +1,4 @@ +nickname,age +fofo,3 +tio,1 +lulu,9 diff --git a/data/pets/pets2.csv b/data/pets/pets2.csv new file mode 100644 index 00000000..2e0c0a2e --- /dev/null +++ b/data/pets/pets2.csv @@ -0,0 +1,3 @@ +nickname,age +ooo,3 +ppp,9 diff --git a/data/pets/pets3.csv b/data/pets/pets3.csv new file mode 100644 index 00000000..96df3d77 --- /dev/null +++ b/data/pets/pets3.csv @@ -0,0 +1,3 @@ +nickname,age +aaa,2 +bbb,4 diff --git a/data/pets/pets4.csv b/data/pets/pets4.csv new file mode 100644 index 00000000..78ee47cd --- /dev/null +++ b/data/pets/pets4.csv @@ -0,0 +1,3 @@ +nickname,age +fff,7 +lll,12 diff --git a/data/pets_parquet/_common_metadata b/data/pets_parquet/_common_metadata deleted file mode 100644 index f9851f238ca5cf0dfc380527fdced9b9b1696729..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1290 zcmZ`(+iu!G5Vg`M>O)?sh|~vUDQ|>g0;!1Fhc&^*G>`-mjH#-~mklhj7uOdIiTWS? z!Tv_onKjL|UCEc3Gc#vr_SolSpcJCNf!tVZ7@hxZWEw=-@FP*-Z zB&o0s&NudyO7En--Mw;gC$za|$HiY;_cG%iU0u5iKMi;+AD6yvpp+6|+=cl|FUC%jJ9p-4(0YvEDM;b{=AAAp53r*4A&fF`zQCpK{K)c1ds zOOKq;%4@of^92**$x;0^&x89Vjp#n2cCv_o`Cj&!6WP%kI}O)%6fH6=wxm(a7971w zrCL#B*q*#$eugQCNyS|<7@V2M`7srL?tH*iowf)e7jWVFLP(vcQ=$O4 zLudN|?`8m_4NFuJxIsup0DvbJQLBhyAknYjp?)+1Kb`2K`nc9#I-ODC@E$v3sxuyD zQzO{;{ATC`1E1=nJ7d0>no2b`THU!*TPcp&$bE+~a{?`y42>heQrqlD8yjN-+Z@Fc zt(jV;;$saXXoOZR$!!K)T%7^O%vG9II#%--KEgW7iGIaSTZB1$l(-%L#_RamhG%I7 zp>G{t22&G^~_a#1AWkS_X)4YA@@IfdcZQvY}{Jo{!^~d;TN@jOXNar zC8MJ!dxbfACsx(KUINAUbPe(cGwc=i9XlPx!yc@|t`GjD%V16&n3?V~H_+BJpIb&v zZS4llLs5rMe3$n_r&fo42Us!hh-JKJogVy^Rc3UMogSH&sg&DAwH zW9<%JQDfv1Z$cy|J(M08Dq%u2SBMCG;ET{rhDn`VKfSMWQ+OOil~t?CVXa>-GwX$s%qsgz{Fn#8!x-+ zz3M5)oO;Z;e@2fv^psOC{U3Vhp>M_x>@Flq2E+KhH}ChG-^?51Gu-2o(bNwW#Ko7& zOf#hXQ3v?ak!Xme?rn0^Y7S>B!>Mn0$1*@XztBjNwK|>K6v*@dKYDx-0KMY?vAA5 z7fX_58Ma%J_HcyDw8i`kNlYk&OOSmgjqK8{cMly%*(a&=#gYue;9`@>WDiFuX^Z(8 zk`4kLPzcMsi;q_`tls{Q(cgUeSg|EL!VJ~WV2^z&B1khZcjzNux!^lbGdlM%VH()m6p0}Hr#Q)eFQO2 z61$7gCSy%W94yC;sy7mOgJ87HiZd#@fvp3VCCq?hrtsB*HWY#})_~xw#?1@;xQ2Dv z8n{Z!n5Zqo4|!H2(=kS+^B^t`3L~*MQ=4_7!BfSn&`9Sh6TAFnbRibkHpHW z(Xe})%aN_~Vz)+f0h{-=!L7N%I;zh{g*MS*@`l=MKz?sVw8FfXa*J1qhEb_wfPbav zuud&lv!*i}>W38Cg62-B`hGKOcxmX50SU>5P{o{FECz@`pS8+V-TuWlr zqw6}adob>*Z58M)lOLtofIV(p!!4;X{F408B6)s7N)HSH>$GStNQBl0xuJFAlcZKq zKh!4#MS22;FdG*yNYO#wl+-JXqjh(_1*tchBrO09>XX9IA=IXmYHsU+hVj#7r6=VIQCKox4AqOWGHCjxX`0Gd{(0Ul76(LZ;r- zI{+D29;$X|hdkkr`Qsuw!I{dQl9vYn3mp1_PZG{lB6;v7Dg2tq@<*8(M^YK#(6#jU zuIiXPNvG1kB;Z%*7Y_HrGeyYC@ks(zP!JSlzWmS=_JT(u7Uc7-+;2)ad_aW52j4V# z`HU43>~#DSm*u%_#|P>xZuUbW{UK5KD_Qt0nf(`upP40@`KCsM)JmV5j>>a{KYEdT z+RO`bmW6hB4vbhF7M00nivP(P3;*wA=|7X{6f_F*n2=0SNcpO3s4L-*Exw%*jL8)G}@3<~bxTcoS@V78F91XP6k{ zhK|2^1UzkF>H*pvGf)jRP@#q!+V+pd+*3_h|EJ})#=T0G8=O`@t@CK!FT=D_ z`T5LO6S(+mUlR@VcuuxTc%dL*Sj;ah-6S*Fa7172V9jE4xD?zMVP|KC(P-Cu( z^|CpVVjp*idDfHGwRP5@mgWuvy=_m8wjFKjtjCUTk58^*Tp5Dpjl>lW;(4LfVumhQ+KGu~TdD)L5m>L)CZJF#`Q=WCTJ%tNj7 zK{YyCG%cKIXJ3`csC%a>^^om#(G0=ww&RxBQ^t$llHnQ4cW{dwqwny-Nb>d^sRxQg zEk>GaL>NEthVdrCpmxk3W|Lx)o`WLICzWd?9mLI$ui(dcclDH{DYq~zP8#!*rC}~i zXHB)9_`qQL<_!_xEn5B%n!zjG_jB73j5?YbKCPx~SA%VPD_#opZi(jz8dn_C=oK3w+@eHtv9s3?an# zecXZ|;0b@gA5;+IvwXI2LSAOTEM&k1JkoG+sZ=Ii{xwzL_w#j*Wb?wFXB+Q5&9!)v z<8r^G;8Xsty!SXbpCnBapx}5+1Y40W?$!ZD@+ID2J*`C8`-BL4SS+p_dHIYLfNhB7Ir`%DYz{I_)?WS7Rway6bK{Qisdb{;RtX#uL? zIWQtQj3-~nSN@CPmH*$ya=)i@9MlQ>SA^s%LN?GmQ(FpurSD6&XIi({hVMkKw=BIb z{_ta^SW1r946~kC+3~)F5&`$pC%Kd$?8den^ZGd?A-oRP0Sk&C$~P^H@gg@^KLEZy zw~P?&t`%yg7HYuZMUL}xrMTsU>3>eQF+O3kz3}AtbDjtDQySL&!qP&20nGO$$JQ6x zVvU^!Yi+UcV@$TP7E9ar&?+8Q55*FcSK2T?!bHS~uuuE_bM%9KuNbINRz^azj9*m}UbF~E3+#j-HGLupI_fE|-aRm9MdDp#;c-*j9j znkd8LQLVSq+rv=z&g^qr(no`M+Hp5FrxECG-?5b8U1#o3Rk6D4G`n-Xb}Q;?J+U1v zZQYf_$)IxrSfr`F#paB@uBHx`6S)zMRnfs59k(8gYhiM>fywt$V}JXgfD%+le%p!eOT-64zKJ% z>_61>fW>3A{(d~_?Q#XWcUkK-SuErhV{5omE6mY+JFa%H7guylMTYqP410yTmwH|(hAXeFWNxvg%4@@I9I-s!#4)^Hpx2I}A8MZsj^opL%f_B| z=2e@Fx_6q|h}c}`tqA;XJ6@H&WxN@R4DVRJgGbaDeTO$DlDB6lJvd0zW}>-5gy{nx zm~LVWYA5<(HYpM5862e9q;`d(gSaW`75td)F1JZqvVvhr(nOyu4Rc{X*kcxVsSxg`+s;#uo6dTxtB+y`>T1EaBFtv>fHV%n; z-(!#c8vPbM?qNT`UiN$J%rgl|nmw%4TG-ETelzo%8UJifX;0)iKF8;dVBz)%NfAOc zO~Xakb)N8h{9Xw$KFz0dhvaz*%t8uWz#|DKmq?_N#gB;`zniUbB%Kv@JllBZX|BbS z443&M0l(s}#hr)2**IyE00qZmB3SZ#{~w*ttc7om<}wdH#eI5^U@I zJC)A-y!FmaFj`;dh@UxAS;Lj&e{9 z&w&xiVLaJfw)8EASNy(>W&TWNIH(iW9|_5pgtV`DrnVIRN!}D}&$O;CjJXrK-m>sA z|MvG%z7TIO7-l`Pvh7U)B?9h+Te%bJ9d%L>F2iV zgkxn;9ag$4y)_7Q@7O-EC4JbBCT(|PbLzhC_8dzYT(@WQi7J+t?M7#&SFS`|t;M#3 zrLDViFz&Yx0Sh&?yV&f}*VWX)axB-wkt#ZvqwUuGQ6-4?HgI{;6gZ}aST9>cDfaOO zm}fOs&h4WHwRLY07%k`0Y&p?}XEkztXLN8DS8cD%o-$qxMTTcAU&AeGjK0AO6Upmilpbs(YBSNCBf|87 zH%vDX2DM`SFq;&M^cXhMbX++{(Lvk<^$LDWcW2uqOVd)Z x)hi;vTeNr|TEPp$_w$=qDKGBV_QeKU&0)$O$%Q3H>Lt?_KVB@o(5TX{Qg)v^}`kQ-T zq0cQNK)Y)NnyCdEaCo8P{9MZKIAQu<(rt}Tm~1aNu71gLZ+=R{x?fsauvh}~ZNahi zrMBE)r~XD;E*B9dTU*POz3|W~9+nTq0+d(SGC#sZ#E7s@d%ZLDb*wDmQ~Fyx%F_m= z^a4&izNfTGr74vuozljX*(B)^9-*;$k9Q(~aSsQ}!tgew5d{ENOd^#LLr1Dy!Xkaw zcAapn464IQcdfSuf$p8!XSSpd`_Z)RZf#E8*WI3DDTAB#d@)tU@~YkF%=OB(sH?Tu zcC@l}R}RMg_Ay|grgoRxJ^H$uI#`Y6dN@)=2XnODnm?)p@!ke5PnrS8%n<8kYbeD& z-T?Eg$I7LB(xA5P4FaR(Ow5)OZF$xs*LOxo7cs65$|JQmGnFRx5Sr@sY65*&o6ZAX z*@M`BsOcVyMr!TjXxM$orQ2Y!kXwwb!Gl_1j^_JOxsAQJqGKvD#P?>{E7ZNx zTcU|QjE*`E*w=;)b85lN6vsAPd1J+Mi!4>%7;gQDTzi7==Y>xbE-Sfr=0k!It{C5jH>rl?o&W4gQ8C27hEhQ&!^eX=ynh3#2U%1#dq yrf=U80p6nJPoWjOGJHS3eU', 10)])" + "df1 = dd.read_parquet('data/pets_parquet/*', filters=[('age', '>', 10)])" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -299,28 +308,25 @@ " \n", " \n", " nickname\n", - " \n", - " \n", " age\n", - " \n", " \n", " \n", " \n", " \n", - " 7\n", + " 0\n", " fff\n", + " 7\n", " \n", " \n", "\n", "" ], "text/plain": [ - " nickname\n", - "age \n", - "7 fff" + " nickname age\n", + "0 fff 7" ] }, - "execution_count": 11, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -331,7 +337,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -340,7 +346,7 @@ "1" ] }, - "execution_count": 12, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" }