From d3871fc1bfb9e52646f52b4e6267e3784b95a968 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 13 Dec 2024 13:27:46 -0800 Subject: [PATCH 1/7] Add read_table_changes example to showcase CDF --- kernel/examples/read_table_changes/Cargo.toml | 18 ++++++ .../examples/read_table_changes/src/main.rs | 58 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 kernel/examples/read_table_changes/Cargo.toml create mode 100644 kernel/examples/read_table_changes/src/main.rs diff --git a/kernel/examples/read_table_changes/Cargo.toml b/kernel/examples/read_table_changes/Cargo.toml new file mode 100644 index 000000000..934becae3 --- /dev/null +++ b/kernel/examples/read_table_changes/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "read_table_changes" +version = "0.1.0" +edition = "2021" +publish = false + +[dependencies] +arrow-array = { workspace = true } +arrow-schema = { workspace = true } +clap = { version = "4.5", features = ["derive"] } +delta_kernel = { path = "../../../kernel", features = [ + "cloud", + "default-engine", +] } +env_logger = "0.11.3" +url = "2" +itertools = "0.13" +arrow = { workspace = true, features = ["prettyprint"] } diff --git a/kernel/examples/read_table_changes/src/main.rs b/kernel/examples/read_table_changes/src/main.rs new file mode 100644 index 000000000..1fd4d071e --- /dev/null +++ b/kernel/examples/read_table_changes/src/main.rs @@ -0,0 +1,58 @@ +use std::{collections::HashMap, sync::Arc}; + +use arrow::{compute::filter_record_batch, util::pretty::print_batches}; +use arrow_array::RecordBatch; +use clap::Parser; +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor; +use delta_kernel::engine::default::DefaultEngine; +use delta_kernel::{DeltaResult, Table}; +use itertools::Itertools; + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +#[command(propagate_version = true)] +struct Cli { + /// Path to the table to inspect + path: String, + /// The start version of the table changes + #[arg(short, long, default_value_t = 0)] + start_version: u64, + /// The end version of the table changes + #[arg(short, long)] + end_version: Option, +} + +fn main() -> DeltaResult<()> { + let cli = Cli::parse(); + let table = Table::try_from_uri(cli.path)?; + let options = HashMap::from([("skip_signature", "true".to_string())]); + let engine = Arc::new(DefaultEngine::try_new( + table.location(), + options, + Arc::new(TokioBackgroundExecutor::new()), + )?); + let table_changes = table.table_changes(engine.as_ref(), cli.start_version, cli.end_version)?; + + let x = table_changes.into_scan_builder().build()?; + let batches: Vec = x + .execute(engine.clone())? + .map(|scan_result| -> DeltaResult<_> { + let scan_result = scan_result?; + let mask = scan_result.full_mask(); + let data = scan_result.raw_data?; + let record_batch: RecordBatch = data + .into_any() + .downcast::() + .map_err(|_| delta_kernel::Error::EngineDataType("ArrowEngineData".to_string()))? + .into(); + if let Some(mask) = mask { + Ok(filter_record_batch(&record_batch, &mask.into())?) + } else { + Ok(record_batch) + } + }) + .try_collect()?; + print_batches(&batches)?; + Ok(()) +} From 7fed21a81c19109bcc043e1b7b916aaba351c9d6 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 13 Dec 2024 13:42:53 -0800 Subject: [PATCH 2/7] Add test that has cdc and deletion vector operations --- kernel/tests/cdf.rs | 57 ++++++++++++++++++ .../data/cdf-table-with-cdc-and-dvs.tar.zst | Bin 0 -> 19759 bytes 2 files changed, 57 insertions(+) create mode 100644 kernel/tests/data/cdf-table-with-cdc-and-dvs.tar.zst diff --git a/kernel/tests/cdf.rs b/kernel/tests/cdf.rs index 5263df960..2be5324fc 100644 --- a/kernel/tests/cdf.rs +++ b/kernel/tests/cdf.rs @@ -172,3 +172,60 @@ fn cdf_non_partitioned() -> Result<(), Box> { assert_batches_sorted_eq!(expected, &batches); Ok(()) } + +#[test] +fn cdf_with_cdc_and_dvs() -> Result<(), Box> { + let batches = read_cdf_for_table("cdf-table-with-cdc-and-dvs", 0, None)?; + let mut expected = vec![ + "+----+--------------------+------------------+-----------------+", + "| id | comment | _change_type | _commit_version |", + "+----+--------------------+------------------+-----------------+", + "| 1 | initial | insert | 0 |", + "| 2 | insert1 | insert | 1 |", + "| 3 | insert1-delete1 | insert | 1 |", + "| 4 | insert1-delete2 | insert | 1 |", + "| 5 | insert1-delete2 | insert | 1 |", + "| 3 | insert1-delete1 | delete | 2 |", + "| 3 | insert1-delete1 | insert | 4 |", + "| 4 | insert1-delete2 | delete | 5 |", + "| 5 | insert1-delete2 | delete | 5 |", + "| 4 | insert1-delete2 | insert | 7 |", + "| 5 | insert2 | insert | 8 |", + "| 1 | initial | update_preimage | 9 |", + "| 1 | update1 | update_postimage | 9 |", + "| 2 | insert1 | update_preimage | 9 |", + "| 2 | update1 | update_postimage | 9 |", + "| 3 | insert1-delete1 | update_preimage | 9 |", + "| 3 | update1 | update_postimage | 9 |", + "| 1 | update1 | delete | 10 |", + "| 2 | update1 | update_preimage | 12 |", + "| 2 | update2 | update_postimage | 12 |", + "| 6 | insert3 | insert | 14 |", + "| 7 | insert3 | insert | 14 |", + "| 8 | insert4 | insert | 15 |", + "| 9 | insert4 | insert | 15 |", + "| 8 | insert4 | delete | 16 |", + "| 7 | insert3 | delete | 16 |", + "| 10 | merge1-insert | insert | 18 |", + "| 11 | merge1-insert | insert | 18 |", + "| 9 | merge1-update | update_postimage | 18 |", + "| 9 | insert4 | update_preimage | 18 |", + "| 11 | merge1-insert | update_preimage | 20 |", + "| 11 | | update_postimage | 20 |", + "| 12 | merge2-insert | insert | 22 |", + "| 11 | | delete | 22 |", + "| 3 | update1 | delete | 24 |", + "| 4 | insert1-delete2 | delete | 24 |", + "| 5 | insert2 | delete | 24 |", + "| 2 | update2 | delete | 24 |", + "| 6 | insert3 | delete | 24 |", + "| 9 | merge1-update | delete | 24 |", + "| 0 | new | insert | 25 |", + "| 1 | after-large-delete | insert | 25 |", + "| 2 | | insert | 25 |", + "+----+--------------------+------------------+-----------------+", + ]; + sort_lines!(expected); + assert_batches_sorted_eq!(expected, &batches); + Ok(()) +} diff --git a/kernel/tests/data/cdf-table-with-cdc-and-dvs.tar.zst b/kernel/tests/data/cdf-table-with-cdc-and-dvs.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..6304ccb06295e2bd6895cced996eb970cd3ac6ee GIT binary patch literal 19759 zcmV)0K+eA?wJ-euSZoaes*&MPR3KQ^rVV}FZue=r6!^PhI-*#4#9J7V5Qn=GQ~Sg; zkcw<+X)1r)h1RdqENDw2tAD?~m!8YzwB1%Hc6WDow`#8;E#LQZ{2#uIGP2LskKc^! z6cq(z1+N6xqY+JD0#7L8i5@7{D(9qB+AC?anX`6oubdduyWuu*1~)k?lv`FvwU{rh znGGnemR2ouDcUW^%=UQgm6S)FrB-pxw!7VJt#(cEYAoZ#@?M$gm09PVRSOF=Fc1lZ zU20%M+!G|12IkhlK-l#}kH$T5y)*#Rf`MS(g05>$oETor4*CBvR0#S1*`eGqGka@u zot>DE8gOvIgNJ&p_wQf+|MUNz9iIQcpkUVv=+Vfqu>~zI@Z!UnaY}3@m6%$Jsogwo zoMv-s7HZlz)UsIN=6FKNH_kRo8bcRA@G$>B`j0~X-*J5g5N`h8T}vaiF;Y4!wvk4g zc~hsXRP7dZ?-^UWp`0>nPE>S;A#8vs;_*R?jJ6QLRd-Bv?4i{4CiL zeVIwAPOMeGQYv*S+Sb{nDJ#{?3=%*7iUI?XKA7tQR)yrcAh|6t=Yb!fj}1RWkERM~ z2#Azn3cx;32LoK8h!nXIg~4olR-kRR83j{|xBe7+;yQpPH+lo8YFt~ldC@N~{eqYbSVS2}5}H^$iNJ4vjzPDu$Jot$4056NRl2-NjOnad7}|Q z^nh}rrOk5Mk|-%c1f}g9N~I26byyjq$&u3%mQ|t(tB4XIiV(q8hY?GZ^l;>fP1QPt zsKSH?DMJVmjX=T#Ns?NP?tE%L)`Ast{0?AdzB42rN+vCOag7!h{YngcHir zva2NFA%oI_(gc;Hij)%(T9`~$6(DL6BtQ@;ty-;}rL+SPN_sfrq=*qEOKh@~Ixx)> zUz~BtUY&8Q6iXsK=hVuvi8PDNF%t<+xaog{Mje*(Dw?^nPDO^(m=MMKg(JE*nJex?q<4 zxZeAK`TuJPJGg=zZ}dd(-~Yo9B>?|F181MI>NzmLd>}9dFplu%D~VhUIz%EE9b7=c zfhOL}^D!-^F-|!vrWNBjrPZ?3I-e5uylJTWtZ?2a;g-2(o9%ZCxQrlqg5=|N_6MO6ix~kg>!&o!ZE;UaY{HHmvTAIimT$bGdTq~B-|0s$rAU2+rj1Ftb?uw_uR{lyWZvFx-Koa zIb0Up6x9jxF(K?TjCJluCuPX>2Po}xBxgF4&-7SjPp1zyc|D;cfuRt zgYY)^8vGdl#h>6q@Kih$zw-qxe+xrH%$Z}!w*&K#CKTjnwld&ta=JdlfK$iSW;?gj_`nk z5$~z4)1<}POL}W_1=Q3WF~W*cBYFq;VN^toNQe&c5vpe!HAIcb)(~sNQzt7>M4}jE zkGhr<3yP@500rQ^wfpMG5j8b8q!7pjBi>KjiK~F#HydRQ!CZ^Ns37H ze8kX$#|RB4OngXEgh-|(9BieSL{f~B9X^ca~~UCZeS3RR@HHfl#hZH>ps#7gDt zpj28BSo-NoUj>m;X-crDsIYd7OswA8NcxpiYoof8&e|D`Oj4vEafB2jqSOFDB$Ck) zGC~oaBUf0cDPZ_5s#Ffjipfb1L1=iO5ED#XT5WLR)JxdlFhRAn!LgYxvBb>C6as}n zN3eKmM`Z+yilXgK09jVUwi3$kV{M~SN~VX0~TO3XKHvy9%Dt=_8EbL|yJo~JhDY_rW;U#auOS67*Z^8yAUU6_*t z8-z5@1|46pA_7Es1$d;7==1pm)Y@2zoLd_+PUp4sR;?-T zm2h_04xyTyyP098xY|0Ul2j42qNzh6)jc5d@Fu<0+Z31s&jfCAL;d zSu5=t$GdZRA&*-IH7hHW5@(oswE0SYG0peddP{h6di$pDtypJN)3m*`)=PD+vd1{J zN#B@f)DdTjXT?_LyagvnR}-YG2`4TsRP2a?7*z;?79ldY6oHFJ#0@YnJFtrkEY3*d z;XIsJvAoh=S}V1bYE??QbKMbYSY~k7SmD;@g?5`%=DIH&XgM*S`qC1v*`{~KX{S8L zuJye;t}$;e^+xp;qv_^Yt*la7ne()FYAifxU?9>6`$T~`E~HBh(gg-~xq*#7Tx7Dq z92VGUbX{qb5rxO&MmNZe9!-c#6x!(Gg9nfvAt)&h7OHWr4S+Y~bH)}qpm;8d=V|~m zq=$1O(j_x`G&-bUMFhxrOx_ZDvAp=sdTG5f-i&T-`=t8L4Dz^bsBNw=S}j7{EALtC zaY!HxH{RVy@5FI$J!8z=Y6@?s8TO4U#96IyaxOET?+Yfj*v5%xDOw>o+DWx$C!SO8 zzPolS#+qtfC+{(uZq0e>nq!PPOHHS}JLrO~u#|Jx|Ib7&z>$F`!^52DV+Ag75rQXr zpgX6n(#jg6#kJo#_3oGBuEDKo8g7zT$a|JSO*3jYij&ZMv5qsw5}kd^v`kxicg@+% z`{rD6rnSyE*Su|}C^4N<$}20C^xj#m8yJWLgI#N2LoOA-sDMP%0Fo1f22-~lpt*z58BiKeK+j59njNP0LA%%cf0;f*f1 zyc=E(R!SMCtaQ>yxwe^{RjZnuP-}I=O=E?URt+`YnfF+RmKv9G+!1%aQOsqRnccFL zTfH^Xsqxmdotdt@Gtzl0wG>lYwY+h_fzh}pNNx)xCkN){_&CsYaq(O`Vm=S_XkxfP zfejYCndjsEi`dH!A2K1-KDACbz`P+aVvt}4VZ0^J$@@I#L}PrR21K~>T3YFpclS9n zt2?b5oY-a%Yq3JQXNQ^8Rm+(E;0Ie;e+eP|zl%3wYwe6P;(50_vz?pOt_EqP8ERXr z&_IcH{Q0E#m=pSnQ{&D#1-zSQHbT$nr6GCat4XDPCVy~HkHO`r>Ib--9mM4GL?e- zK!Ow({vYN)FFDZg{|-l4Bb|~;8f`4bnYP)ywVB}!vr99aTdZ*Jz9F^L_MEd!BfeA5 zS(@2y)MlJeL3wWdGX9t%babTR9k8>r7$s+QqLM=Sn-6I|A&Ez z@wfrU5qN+ly;NQ~X|$0Q>&`W6>FyH4t?dlwloeWxHJI(2wV4GeBzWfV%z<5BkenHq zO9T6SLDGWctiW6qT97W%5R$tBb5vlT&xa-kAfp`|{(p<5(lL!@`jnk>>MI@74w(*c zp~es%lPBf9^q^?|ABql0!Gt&DWsi$O5k%0&!(NXbLqJ7H;|ec)B&PSud9STg)+u?% zmF}uF33u-r>KrSKQ)9UKjhnt2lp5Der65#SopCN{DI{kxqqAyL%X@X!S-iAPt=Wum zrc`eewpYfQHLi0;E2Esc2W7M{otKx{T6NZ(F)*ND4m5P|WO&%i%gbjDDkN})Cwd@z z<+N2=8gZR5?@4>#TFx@K;htg4vqCy!2Ct=ACaGQBJOPZ4sWsMFZ>(HrdgZ;nYe?_R zpp3ghn|X&(b{sA3*B6%7Id#`+28~aA_#@r-x)t1sr zW3E(6Omoh{j5aVWBzFXoQv-8dz(5}@*9+)h^!a}f0Jyk;7NYP(^FIb3W@LWskT_C@ z4B9wC7oL!p4HT64x1fDs#kgf?}*BUPx(eQYs5Dy?dLT;2rrUw=PgNo-o@Z1~kMgJcXeOizMA{gN@ zc}r%s*w$-j#h70@x7IW{DWqDI`yVm zNi(~da<=-)nJ3P(N^SGCmlLkb<+_lFz?{g)T-W8<;zbkQcr(w3jEoG71jcxR0<~g^ zJFgjMG19m%+≧d9ILBDIw)u@69ugcuq?5-m*<6&tlhb+uDW^;|im-6ynninDNWxG7*mRwTCt zb5CF}tG}vX8wLaO|Nni;YNOz)x3>NNJ+SoD|9hI4;s1XIOHX;MK!Fl8@sd0vIHJEm zVXUA99e6Zv2G1&Sy_MoCv9&r+oaJnzhH~N!wH+(WHi}`^ayD(f9w%AKduglKDs|tzhE(xaKHhCE#3{U25IAs zGuBw`yfkatq}B3F>l=(1YnV5$kaoSnEN8uWpKs^@gf0J4ag_GfNG-Ki%XHLm-CJ(3 zYjeYFd4*c96XK0mDRqz4PFZ8EvhvRRjO*!{JaU2!tRA0wEX# zF$e@P41ypC22ng|m^FQCDgbp|3G^{wXLmQU+|h|_vLzQZ>d5}YLGPz@au<9j1F8&{ zU{#PN5w+K&lmjD9$|m`mrfB?a06lW9ugSW;_5)XT?wh7P6S4#Nyl*O^3dn?ibZ`!Q zvexJ0Y?&EU?n0_%|DWjW-9P@9%WlIlSN?D^TOJSky&>Q=Y`_M8q$A%O@H-lzX3fuT z(r+g%O>aQHyO9IXp$jxspSc=9?#B&jKt|sikmLt&IvlWTOE;TQL_e;z=8@hS=o5ZL zm+3)2;w%8lb)O2ne@t4EIeY>*B<)?Dw)GFrHro>DX2sfo<{bdvZrY~+)TK;^t6cRB z*y0Dk>-NB(g!l9T>A|eWuoXyI_*Ys8WhrX_bS-29a>;4{g!2d3NY=E|VfD9uRf&jNH!T&@A0XJg8a`?mNL0J@avxz6=^ z1DyB(c-=PeE5!hP&)BO-#fixI0*?%ME3;hhOMd&lp9fT&F&480_70a1$m8%cps~;g zBy{|HXl{pxgE#6iU`_O7pF_z_(QOPQCMS3^FY(CUP21I)Jp(URXSW3d-xv=Q;Q4%- zShx-I{p(M>nQVd>z~(F)fJYht9zViIf#fP4kWvTu*Rg?Lzz8Zd;^u*JdWc%Qp^#BRQcx>$e zcx-^u7x*Zcp1AgRF@awk-a^TsOkw~9`2jUV59i&A;vJEs^Rz;=$$pSGFk&L^>TcS5ldqFaC7tf)4n1Jx1XGVn09?L2DE+G!6}b-)w;| z1K$BRrvWeCqDTTT;G18H1Bz<^2nN`H&GYkd3nb83AC-b-v0%r7p#b~f;xwB7+6BFUu)asX0pfDMb_R(tk2raB=OF?_%Tslu)> z`}IosXS6i+V1un^#Gw#yrb|^4te}kpzQEHr2?r9^JXA9K7e51>5W-CuAh+%4Y+%6k}Hgz|#T7@^D7MSBamo3kv+E zg!an@Y-E5VY$>VofCCTvmMIcD%JS$d*v*&gEsuBxMmLjDP+aa~?lUm(DE^`cU#_o^ z6ky!q1r|~9lMst}ESz}mU7O*)&&4QcSK_pzLkoOKf~&Ft<>CxrCNj)6fO@HlgzFKW z`%IiQ`9>T~An{;d#8G6^xTYG-gWdKaO4yB-tTQ74?+m8vS-QLXq2`iC*v!@#PwK3G z3KrSPNrbCAdKxglB62BpK#|lQ<81&78R0bw!gxSQ zr4F4d&F;uzQW@3RI~q(R*+_t2FiAgShph zf9;VI+-?_UZpP)*xu4Xt-~RhK@FuETR=IUA%K=9XvH?xv0qWJNHwr3v0K8Ix=E_#} z1_w zg0Z!oX^$%EV%l;fXiQ@ia=s>;7YJT2S=@_1Fm;1 z8z4Le5Z-fq69B|mJb?WFx&xT!Bt-DzfnLCV)L4iPm~s+JT)se1oB_H9aVQnorXRN6 z;~|M?-2e;=vH{;e0KTsvpZo%y;{gR3;InUKfvYg*l`Gqs8PN0|FccY9h>#0nahyUh zwt_!a9kiUETtOM0X5?|5AOC-zbh|J67<78pT+U6iKw0;c3F#2UEg3G}q8WAOGR`&t zAYl)0ISTYF=F)W9z{k>?I5uT+`ZT8j3bv39&?y6SH9)EP1Ezz^%>S?W9gyAun7>KF z1fN%k9LHuk$ zk?{vKYoO3orMDc1lD69F_;$aA7_c;zY(NniV11$@Mt-9Ptimu17<**{7{>v4bXFE< zF_^PvHyUKlk}R)~Iff)O;MN#_RLvCeEf;2tzXSNefIsL5K)zA%U;y^VFn~Z6k;v63 z><#eo0kZxs@MVUVrj%ss008H<70mF^4@C>5k$SF)9Phfq;KObuWZ*hitOk^mY~s;! zGe+q?jLEb)2*!-T^xTP?IWi#I3`&QIdUhI=3$dPnT$8;&p3x_6U-+`!hEQAfbP^)D z2W11ij910c8?uiq#Dk{HsKZGk@tZ{-=3ISd$}b%3btdp0Y& z24Gu$bLO!%941P`{Lh4bzGeV0^k2qe7js$J0Zh0l2tqyMShw<=gp6#9{L4X~?(c-L zJYQ+h_sR?QysV*WacXj@jm9sU9@V}!^J)!qZGje{C*MKzDUl9d_mZuMfys+-Z9epN zC##^}F)kJ3=foG&K5clt?XV5R`Sba4K%`Lk+_8ax@x%J-bOx|8F45n6yXYe>3FAn> zGDc1ht90L|eKH^{^@=u!JGOOtrY*=Z*3mJ)e0}_T$ebr5DdqB1MflBwqtk*jPM^fu zB93Z7(^JWBLm&RjNF7GKnMI^%{Pgre=`T|3f0&jDqJH8xbCGDAMWN8&F9H=sQN23mh7tKK=k8 zH9ix{g`7%sz$p}5Sgynw9KeIb!mqZqZhyz^V}IBH;uT~AW;{TcL5q|8j2b|A3@xM> z+8eOI0|@KlED#wp-IHZ!$^eR>Di$!N@H9TL35W%RQfw;D-8%t-1UD1fUuWWfGPrxc z7uZD}(EEY%Ybx}*ltGvq-_Qi|I)EUg+JKQiNBgkwt32ir9x~TYv+CRI5qDo@B74JZdqX1_drDQ)J&YZ~>`I z7ZS;E(^XAT)ZUOI9PavHqwU$_Gpat{n9jQ?Op5B1Jg<%<*$p??2MQ(|E3P0JtxNUh zE(rg&*onr3pI--n&rx8Ls#whF9~5MkM$6|`fF@Vwm^Z+nFd6_GfD-&`4G_Aad3C@X zE8TSB<3X@}vF&vtSDB@5lEeEZMF+TKX|g*yumQ&o2FhY4I2aHsV*}*Z9YgTL3Hn%r z*Hq?$$&-~X;3*PcwQgxQfHh{7z8Ym})M2&4UMKSi%%Y$6F2gXF?FHr|HXa}z>s!Fr zzCwFNS88(z4g@%&#ldjpuj4FZxc6bGnm~%Z>D$n~y^*-(OCS370G1Qq(1K;y5~Y>( z{83hiMa&Dx|``FPhd{uKje0hHvzvxt7XH8J`GA(w))~p$o zR%cOj`j92rLxVZvk)er0mS!fkY&vUHlcf5DL{n)bX%<^IGBGhRR25?b(^)YwY{kTM zR!oe|>KIv~#n$Sq(WELSq7luq>8z3jjYOZt7S#1n^TBriG*5)FxqWY*josRvzSCWu zMO8}%CR9m~h+0errn4qV3dzw+X_96{lc*#b5eec!NK|cSm1u^LsF7%zMW`O!S)@5r z`2f{~7K<-#XOU*f1RnEAWn{4hox6lQ{iC==ZP}zxuuV>V^i7`CS(Ho2Nom9>GD8TK6dYJkyik#$f{Kk9La+?MBEtw46fjg!z)-P~ zG6c#DAS@_9sG#hiVk5-{1v5}GIG}(ps<2@TAQCj8gBMzGcyNSpQaAxnG-Q|}2^{tT z02-))vI^8dnZO0$S;fEH-*fr{7dmihg9ze*4L^ytU64|Ja9^5boK@7m|Vbu0A+1{73SNRhz;6Bd*i zR8U}0NnuhWq(VZB7%^hdSb&BCC^5X?L=POuaZU_Y!a#}*L)7pDr^9hJX1D=00=NMQ z9hTs%a2n`-uKoYb?;Ur-ZiX$s7#rnbZ|@-0=A`WFJ`+XC6)9e*G$5l3mKj{Ipn#zw zV+9ofWOTve1q;d+DkxqIP(uJ3KxiDGKmraNfB+3Ao1%mYK%BtwfaBq$aAL?XL&gEF zqa9F#3N9qU5yBDSWq=w60FgxyOmLLLUi16)l}~;s-Tz57#}YOPqc?xNO+Ej4r#?*s z6*ZF6V%{N_`J^IE(&r;uY%M90AumUEAPI6r$S*UyxKHC+oz)_OixDI*EkBCvctO%4f=dvW z7a~JI*wIgs2yqFr*izNiSuH_aUXTbG;__0Y#fT7>Wz$)tBsZjzk|b4W#jqRInVFi< zt`iAbG3;i=te6gq9h=UIsX!}+b!WwZYC@!vERkeMQj4kTyOCM8FrBr*jf^sw8CepQ7L!rNFr7szg=$0^%$e%5n2dXL8sCHdnDO)e z2-{z~=W!o&47!H>;2RW)QQ@0MC{^Mo* zA3r{2eW z8E@1FT|aZtbyVeN>W*W-gU+BUyBD1^Zs;)bJ9F!vvTJARaQhvrGw9+!_^=uOYv%fw zo4=X5?sE@rGYDaXakUvg$IB0yx%Y2}j+r{^Lg>v5Dr{Z2%s9T|-j4nbm#KU781nd^4y*q3oDuftvj`MqWDWzOEL+RK#p)&1=<*SN1ym$}I8oH~EZktsia(f=`b z5b8e1+_Un7&!D^Q)@SPaF>d$&ZU%YPb!6tuOrKlE8yP+?gX$n`$O0q%mlJ8RwMX8) z)mfw#jeUKxmjy@WTno zgC*1hq#L3n5qOSgxV;)7$AuiJr=R&a#-3vHsD$k!C(Gs@cYV;io%1_!uC*WY?z8Le z#$MfBhBw`WS`uUgR}J~`0`ntC2vpNmbwlQ6MotyU1SLd3HA3bzsu7V&Gs#H4gj|L+ z5sg-i4NOQ>$*hyfhi1viWcN?^WsRu)j!^d@1>2m{(;mCG=L_41 zV#?p=!(=o`b3}t9NvK*f$xj{613RbPzBn!I;-;WOtaYf=$M*JM4(y3 zr6f{9qLCpZx~gOoqm^_wkT94u$#L~1n62@b{K)`AJDN}-w}t;d?7 zDoJQAcC$!xBr1{SsG<>RKx!f?A`0Csez=;5ij-<-N;N}3yQXSXnn9>gHN%5cE%v0E zk=1A#S)DaAnM@!vB2u-OTTCEnolNiA|F6!Hfmxkp0uM@xfP{uDDl}P>o{o0BU&((QZw@!zY9`W)Ar z%n)n0E5l{}x9^qWxJxyZ&G`h|)Y2P!7`D%}ANkXC{{QaXYpc3<{}+VtD#+mkwB*N2 z1uvc>BmBrw0tgQF!q)D%Zo8v%{CNDWN6K#hDx2KTw?}i?Ae``7M9g*F7-^pK?qnvZ9{)>g=kk`6Z4led2;n{1vwpZ8I3X3eM+W3x)5 zNh2E3B#|K*gj6jyGgA}ltZFooCiS903y>BgD@Jy#v;g6;<7K7gWk*X94f?6Kdyb#0 zc6Yya4SD;u>uz_*4!S4d^z3tfpO0)iIoVXBx2E80&~+(o?saE>r~EhKN{*EtLUyd& za4Q(BK<5qtRZXDLz4dl>`kcFsy*p78T#qkubj~fu*4=IRJ9-c~^6^Dx)_z@9XZ_uM z>L6Tw?cLqog&UWZV|?1rpW|Qd{r~6o9(?1*5+5ZU(u0~79V3X2R8XTn*n8cbBZTS> z7cPv?^HNg0ExLPJtMlgl;pTC0x}NSSKYi~VguD8z&!0c`hf=-YJ!^2iqJ8q9cO1qj^vuioU9DH|~6Ym6KoexE6P1 zlWy|%lbo75oqV=)?%wHp*X@*BwZEUa*W=$G{_r>bDckhP+wbC|8Hkg?7aV-TLNxwc(*@lKe)UPot=?(!3HiveCP&3rW=n182Ff&1CZ-7gc@D9 zM`cf(q&n$hl4jV?Q!7K$QTkC z5fK?l)B!^lU?3nM3;!oK`VHUk-;9IZBxhsp*_{K8Ir2H3DXAZniVQ+56IGU={#kULz95>%e{< zivXgqNTgyU zcJ2{)y@>;kPLB=XRAT`5Oyvh?aWO>^rSu0$RJ1ohPILhJVO#+FW0tZ3ofe5h$OkZK z8pUE=`T?ICHho9tk!wT~X^=}~#yrd$Ef7dO=cf@!i@y$1(uq`Im5q*pw1|U;HL~ZU z)&>1|OobA>@0`lW%r^G9EJ~4&eh$Oraqmh2yGrlC0 zKCx)U9cYfx{9!zuwg6+rW;N4bcO4`&;kN(_3^=iSfnH-PCKZN|HJ4CKG+CHWQmCS7 z0M4zyGQ|Of`_RU+Uw)J9JU|EW07ap*K)gA>z5&|IJdv#rSR_3eDqEwJ=Mf*M|FU&T zV=A(bB(fHcp`$&@ein)Z#$KU#&RJ|JNx#&JQW6Zw05lkHj@!OE%HPn3rG=B^^rIdQ`JF?KTy3y42o9CClvt%hNFwjNtd(441Jb z-D-hlL1D?fD9ubUz%a;-H-NdF4M-OKcmsm&1ZVyx&-#3UW`Q(9J>Z2Nz?KHPxyH z0VpJ^l?7iGLSt@=1y)Yv30 zT{_Q}7Z6S|st_;1dAwVE_x|QwNn&05b{FacUSthGbw3+W;vV1(2hiWn$*(ETiwr1S z3oM9wK*{Y_snUR#iU;6mqY_kf1#-R_eykj@=W7rGuRv&Ht2k~I;B^);LvJ^LJ)IHl zhbjzrVBwbl*-)4s#DG_C{O|@mqk7o@8eGr#0LHh=UkBrKe!NrbKS_b_8&HI9fVj|I zAl{r`z5&I=4$w;PCpUZm@rEH#Dk_r45ehQ6ddQ$i_FZhPvrzVT@kO|^=7=8(cxh({ z?d~4Ik{)y0DNz0aCcD!QoprgdrW;KkxTUzw_kc%NW~KpUr<4t_FgG1KV8}6rq4R)( z+5wjJxBzUWtvOEO|A3OgIwM-T1%5RkQ0PMZ%}gDd!3#-4*rdz488^Gqptu1o@dKzO z1568ez)|eUKN^&p%UC!t?4(`V8!$B)KvB0f4Sh1f1p}%ni`YZdtr#gDCD+xNiw*As z0MZ+1EcXUCsQk^1eL1>)kh*gdo zo4)Cp)n6hR0mzs|HS~9&=ulX9DK7g~kuwHc6v9;*msXN9opdEI{Rq+eu|fd+JgEi~2%3?Ya(U9i<8VN)q*RT>A8CUeD+&JANcIK;0+V0LN_r zemQ^vX>jxBq4NOGZ2+c`wm{$013b@1@&YlSkT9u+N9Hs$am+4Znd67WYgk}@y$mnM z?~rML7yJ6X>#{P5MX_% z^aiBTGvX%z0+`}Bfc;$o>KT57^wUxtKwoYEs|Ub*JO2EU+j+p}cmRUYU7+~s0rTQf z6kGqZP;v&1BHcFQZC5QF1C}j^`8b0E7%=4gv7c`PRCb~T#-biTU$n6t^#Z~01HzK( z*{vF&8_;GeGA-l`msGb~*}H31>!2t#?SIDKmaAa!5DJ>JsnzcApcIz5ln>mAfCh#{3ekHG_n_MZ0xnnFR^E zQ?W8pfCixZ4z1a*=BOVe62C^C7qa78afR<#nwRPwp=>huCm68eaO~j$7!Ps4kKM}U zx3|UsasYE}z?Jp^cpiNL-suwqm7D{*aV^Ot-@A()G{;qwP3vsSz$M&Br+L9Urxc!EMbnAa>bo%s0Eyu z7k@T}vS2KIhHGZnoC1b3m;K+AqkNU$W~k7XAL5yMnOqoe{w}!H%I2{(@`(aG{kb-` z2smpNb_WxfQt~Ywzz(#9CePh!S_4rm2duptzy{MoPll;n|AnxAq5SQNHG^OJ22imo zip=kE0Qy1MfI^FBeMSPXw!sjQ0B3IioBRPtLwW&km)<3Ss>=(omI3d$4{&{8r40ae zob<_7R=es5XV$SQiM)$>SWU%U6#%g6anV6`@?=2^!-klvN!+eXXX+&Za<-&))K3N? zK>K_}!o3rEV5}QdfwqAYLJ?IS5JtG343Rz3c;W``fMP!UBG7f-mx9pPqaAKdTjFph z6OptHa5UA@8$IJlBQlN%U@}dUV9lt)GfkVLOr{x0n!f+)gQcZhY5RFnH@FCJ+DAJ0`H&j0+D~^HVl})|&;4 z1YmuOAxZ*7-vAYP1CUC&OsAV7y-IGP;Da}nTnD3R}J z3|*<)yIF$QXXy?IWu2ne{jR_#s1G?yUe z#ZqdI(T$@!LC=RCNQc0-w_i=5yqV)~G(SntM!H7NA0UqeQYrTZ&YWewysx@?41AT3 zleb07AAA>E@XS^07V9q1CIrMd$J}K1VyLcBqdw)>a&UXA<$_gD1Ub5=D8+Z)>yqgz zQ5y2vb`3bSFU0Gu*(s zGtcOm`V-KC>|6|tAS0f!C<@PLb3BQ8JT$|>NNHScH3Nm*hI^wWNXXE+r7_TW-JO=7 za6H7mMN@EH8PU7(Vik;Q$8l}<9grIU+(lfJoS>EeI(*rH%Kmi#$JGI*?dLbLvHE>( z+kQhToV@{vFH{Eb?*cna3s^Y-H5?$C{sE{NDH~9qx77fVIXs(?-T-CURJA8Fy=Hd0 z{g%LB7Kfb}2<|YZgwJ?I8}RP|jAUBX_h3jeDkGcLq|5@cDQS%cj=(D+U_u>`KLuRh zO(=!~>qOH*0HnQfip@a1q5b4vlxx-%$HhD5XP;6UbwTM3eMp z-w4+E2T8yl^|DubTL*we@}h6RUSgjlg9IRv)*m88xo-ds@&J7yg#~JP7yyNk27qk7 z0X$~Fs68`!8GwNfKu`}J5yPnRu|-KL#bXp>#B9J00H$g@0P>64Jv#twO>s5=nn3y( z@NWHkBy@|9VsQk%TG9cikt)YGpn5tg!jJ$cbNlC{DfR}G^8;Mzh%5jqhXFv5GRHSy zVmCm#UKolDz)}Vvrn~?3iKUFOYoLAeU=hvn-`vtFn|fMJ7W%f{LVQ8y!M|F94oA+{DDMkE&Bm?{_dWku8Xx|d%FYhAf3=R z;7K?DX-GLf$Uijzn@6eafD6R|2t^kb02OKVA4 z9#ET5a4r~Udbl_4g@9Lb9$gwSE4&n7?oj*#8FF`kB|iH3t00rUN9(g2$5 z8}NP_kR?ig$uG(Pu{~gT`{tszf09RtXq3#CTB07P1l~QmVNy)=i8F+*LTO(y?kh?YiU1L z7ou>mvv7_J3IhTL$c9irJY=BYldEi*nFpa{Zdef}2%jc3&x5I{2NOGvv+w5W##Sr6 z=Biv=E&Ek-XYH6N!%TY8I^8m+dYUIi`sfp_A{u@<@hbtTcL(Jj*KDK2Y{Vq1#-vfx z$zgNU(dN+pBOdXnYRk0|E;5ZRW0|;$)AJZ62A@t2qiK6st$DBP)XYr#QmgG1QF1a;ad})gdu&q6S zhDAyO#AQSTU_j%dWa5E3oC_0T8m;mHZJfyoI!H7UA0!`*6$ljk_S^fI=g|jwQ!{r` z=9r-6r-*54qlETWQ+JPdma`C8EM~P=`mUd9GX8Z&OMwMTt@f^Jm5b3_XqS^|)aiMS zY4f0TueAnSF^_sL`!V-cS9WMyhgxR8%zEcq=c?VD2chW7Ne{$KY93*7S~o?{lVxXH zs;Lh@w(g~-UK#$0w` znd^G>SH3DS3%~rTE8g{5X1{iXa;PKQx>Q?GQmC;UGZ$bGKwuyU3W>!KIgzEk0TaLg z2BJ_H7z&6&p+Fc62m^zFKp+?j1Hz#&C?pDpfg}Z!EXd+qZ+Kbl+~XhYc&FnA1oj6I z#@>$dv*DZvOn49Awvq*yKQ6}@9-vcYPzs>wAFza-L|HkC7XfhI(^WCQhbyWKJ%&&} z~stGf|CevH%*$X0rhq2V+YpK)O%r0I=osw53pz2Fcz&zyh)D)deh5GvG7=sjUo6 z4(MY8K${%EOFvQ%Smwe&G8HJO45FHusc_=+NuL1}!~#T~OOPP|FasvmGK6ysQ0Y_MaX^$+NADA0LC1GH@55vcw>l62xG?lj2#vR<6d>@RxWupGytm_e~D+5 z0gLnW`wkHKx!HhOU$`7h9rSPjle(tr0m6HLy{4>rx?ca??Ld8iA+X>U?*ZbEiLTB_ zPRoyV14xEFKY80gK&phAJ}tag3ntYD}8NyLBpT-&N%% zT>wLIc0jNTCuA)xmT-nN(E26jNUQMY1*&$?|1TID=oCNbkxC)ldrKAn0yQe-bLy=N znw}7@)%o=ts{&-WXT3=%dVa6lrWBK|IiER7AH7MSK!}xuMvFR-;2%oUGR4k)m_=WE z^zYgrLD3oTeR3Xfp^5`SX&4B2_I9*JKZXGTpY)nTzz#^#g#pHl8!qx6AARMu1qh-Z z5MYVGnJ6$3@&RLKTb_hsnSlj(q1_mYPhmK~%F<3{m4kHcApf-j4z?)fZsKx?jrC$PAy8ZyZQ?#i{>QQi_q!b&fiLyw*M%o45 zlLUrW1iO8=JC(iY12zdU)tYwqyhuSNg~UArc!Z`*%Zmj7l8*{h-;j0%0*YR4dUNp7 zByDWIR-cIp)TAHEX|1Zrmy)B(20REO<4+vrXT$S=07U^;vw->IYc^o?oQEY*ffIKh zkd%k!a_&z~e55>Ciwx^%hp&vJAPpMUo0xc=#Ff*qdQ9=`FI7xS{#U~I^$I=R3%Pi7 zYfIDoPa67PA$PeMsQxw$NHtO*DLpsDvz*_zQNThpJ_Eq(2e-}{kQm2+)bl{m;s6VZ z1CYENpdr<%{KaP&12n~K0jAUgX21by55*KfO*nvI58&?1fOKIRcUX__N&qWrE==Af z*5*N)u)&V58NfQM)Mh~W>DhpKZh*%$NtOA{=hy=%EOP-E)B}0}4VP%eOd#k z9U{2fJC->c(BD2_x)}o|T~zu2eE&Rv#$pcGFxbrtP#=J>^8nBOWV8XS$N}C$W2k_w z=mYS`lI14g<{XB_qNFOb#mOo~k($C-6n%yiw15mqfenhMdd0_pxd4_H+TuNc+5664Z-P?* zJaGdErUCqB1Eis5cK{gQd4QSYW(#;B`%yLksSOZtxj_M>Mhu|fwWi?2`!~a5ch(=BRI+{8{AV~EuL~p`)T-2^Z3TQ`k#=Fxl!)+%kXF0?wmc9Fm`jLR z$W8SdsaE7BZoo#n0U~C?P(GTk9#Rje%`KP{_e;E^V9SSjk3JYQ73zC555EIF5(5$q zH3oR=hXb+?hye+o|j<|myvfUB-xj1Tkx zABO=NcKU%^HUO?Vk9i-!XJjZ8?=KUV@RWa5pu`LzhbZ(bU$nc!+_5sKf~-qxlG%nO z@?rs@>_vj;PDLEhJD5;Oo~z&D?Y<7nU#XsvEY?_(v4Zu&V*QD2btG3(l>9|V>`vyb zKE4%KC-QsLDbE;0%;ggdrZg=)2{)O09EqIy= zV7HO_&3tolA}Nxtu*+si;=@B|BH(j5EsLut4`CW{S%9!6gKel#gyk^UK*8jE(4h{RyV_k4KZ1-cTc5CC9&FL>N?km$7vyL)S=_)Gf zM37!mwj1P$D-fTZZ4a1(j?qoW7^1Xq`o(@)T@GP zH`;WfNdoW*#`0F51+WB(`KWccE`m(L={eQi&#g9;zTP+!(r@e{=Rp7d8s&BtL+*C1nNGm5$tf7!LXr5 z8Q9Go;n&Q20)_p|(fE-6@PGiS{@7Axv!o zeGE%qpg_Q(Zo}A-m4iq0tvI-0V0OV_c^3Q|m zMJvPr51HOgs&PDx8vtX-5LI}8Ju|CpZ!>Rfti6TBKQ%3@TM8^&xOrQMwU%plcYA8K zKSg{g>RqN{)?2OWFRMMZX0tQ9O;g<5O*Q=u|6}U~fiS4SI!amtkp)CZ2q8uZ3{rMI z5qh`ic@!XfV%`F4$X2PDg3Dlk@g1{WNz}RwiC`_}h8#FyjRuZ;Dh9)! zfv*%|#^u)qw6tjvk`^Qm0z=D|Hh7isK|(VGT0t>jjrjN6fWP)^z_#3UfTBVU019A! z1q`pi4xbn*BogHtaICHYc;Q-L|JdQO0dd+t;9v@L#s2_?{|J2 zq<=sdp0NtMc%d+gJb^K3CX)dmr`$Bl!5hgqJ_T(j;Cl0f|M!pE7Y2kAY!&o?W{?5j zjn*$PJw2dOXFylf!O{mT>DBc6UqUqzGv$-by52!jk8(_WU02S>7 z_VcT+^8i*18=$fp7D!GHF#xxvD+N|2#sGw;%HgII5IAapuv$lwv}56FU5lzDc*6qF zNIuw?_xmkiKPDwv#Kah?aXuME&^ zjRl?`moZ2N@bphu`Tz!p$b7t@EKrQpu^*0JU!cq3v7!6c{3MiCmsX%2pw%-*0c{f2 zrDI10vcTZ%gXMA_p8)~bbwe~-*SBCWnGT`9g}k%{Q`V}FP9Kq0z}!WJFv`afwmdN( OZ`1~`aInAeeGs0i>k5Ma literal 0 HcmV?d00001 From 31fb911c2a7d5ded15281706eaeaf522b9e65507 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 13 Dec 2024 14:28:18 -0800 Subject: [PATCH 3/7] Add example to top level table_changes --- .../Cargo.toml | 2 +- .../src/main.rs | 0 kernel/src/lib.rs | 6 +++- kernel/src/table_changes/mod.rs | 32 +++++++++++++++++++ kernel/src/table_changes/scan.rs | 10 +++--- 5 files changed, 44 insertions(+), 6 deletions(-) rename kernel/examples/{read_table_changes => read-table-changes}/Cargo.toml (93%) rename kernel/examples/{read_table_changes => read-table-changes}/src/main.rs (100%) diff --git a/kernel/examples/read_table_changes/Cargo.toml b/kernel/examples/read-table-changes/Cargo.toml similarity index 93% rename from kernel/examples/read_table_changes/Cargo.toml rename to kernel/examples/read-table-changes/Cargo.toml index 934becae3..f9f980dc2 100644 --- a/kernel/examples/read_table_changes/Cargo.toml +++ b/kernel/examples/read-table-changes/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "read_table_changes" +name = "read-table-changes" version = "0.1.0" edition = "2021" publish = false diff --git a/kernel/examples/read_table_changes/src/main.rs b/kernel/examples/read-table-changes/src/main.rs similarity index 100% rename from kernel/examples/read_table_changes/src/main.rs rename to kernel/examples/read-table-changes/src/main.rs diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index e771379d1..4e2e3a174 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -10,12 +10,16 @@ //! //! A full `rust` example for reading table data using the default engine can be found in the //! [read-table-single-threaded] example (and for a more complex multi-threaded reader see the -//! [read-table-multi-threaded] example). +//! [read-table-multi-threaded] example). An example for reading the table changes for a table +//! using the default engine can be found in the [read-table-changes] example. +//! //! //! [read-table-single-threaded]: //! https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-single-threaded //! [read-table-multi-threaded]: //! https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-multi-threaded +//![read-table-changes]: +//! https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-changes //! //! Simple write examples can be found in the [`write.rs`] integration tests. Standalone write //! examples are coming soon! diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index 2c15bd537..3ecbc0969 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -1,4 +1,36 @@ //! Provides an API to read the table's change data feed between two versions. +//! +//! # Example +//! ```rust +//! # use std::sync::Arc; +//! # use delta_kernel::engine::sync::SyncEngine; +//! # use delta_kernel::expressions::{column_expr, Scalar}; +//! # use delta_kernel::{Expression, Table, Error}; +//! # let path = "./tests/data/table-with-cdf"; +//! # let engine = Arc::new(SyncEngine::new()); +//! // Construct a table from a path oaeuhoanut +//! let table = Table::try_from_uri(path)?; +//! +//! // Declare the version range for the table's change data feed +//! let table_changes = table.table_changes(engine.as_ref(), 0, 1)?; +//! +//! // Optionally specify a schema and predicate for the table changes scan +//! let schema = table_changes +//! .schema() +//! .project(&["id", "_commit_version"])?; +//! let predicate = Arc::new(Expression::gt(column_expr!("id"), Scalar::from(10))); +//! +//! // Construct the table changes scan +//! let table_changes_scan = table_changes +//! .into_scan_builder() +//! .with_schema(schema) +//! .with_predicate(predicate.clone()) +//! .build()?; +//! +//! // Execute the table changes scan to get a fallible iterator of `ScanResult`s +//! let batches = table_changes_scan.execute(engine.clone())?; +//! # Ok::<(), Error>(()) +//! ``` use std::collections::HashSet; use std::sync::{Arc, LazyLock}; diff --git a/kernel/src/table_changes/scan.rs b/kernel/src/table_changes/scan.rs index 0902bd2d0..9b0ba3067 100644 --- a/kernel/src/table_changes/scan.rs +++ b/kernel/src/table_changes/scan.rs @@ -1,3 +1,5 @@ +//! Functionality to create and execute table changes scans over the data in the delta table + use std::sync::Arc; use itertools::Itertools; @@ -16,8 +18,8 @@ use super::resolve_dvs::{resolve_scan_file_dv, ResolvedCdfScanFile}; use super::scan_file::scan_data_to_scan_file; use super::{TableChanges, CDF_FIELDS}; -/// The result of building a [`TableChanges`] scan over a table. This can be used to get a change -/// data feed from the table +/// The result of building a [`TableChanges`] scan over a table. This can be used to get the change +/// data feed from the table. #[derive(Debug)] pub struct TableChangesScan { // The [`TableChanges`] that specifies this scan's start and end versions @@ -37,7 +39,7 @@ pub struct TableChangesScan { /// This builder constructs a [`TableChangesScan`] that can be used to read the [`TableChanges`] /// of a table. [`TableChangesScanBuilder`] allows you to specify a schema to project the columns -/// or specify a predicate to filter rows in the Change Data Feed. Note that predicates over Change +/// or specify a predicate to filter rows in the Change Data Feed. Note that predicates containing Change /// Data Feed columns `_change_type`, `_commit_version`, and `_commit_timestamp` are not currently /// allowed. See issue [#525](https://github.com/delta-io/delta-kernel-rs/issues/525). /// @@ -45,7 +47,7 @@ pub struct TableChangesScan { /// [`ScanBuilder`]. /// /// [`ScanBuilder`]: crate::scan::ScanBuilder -/// #Examples +/// # Example /// Construct a [`TableChangesScan`] from `table_changes` with a given schema and predicate /// ```rust /// # use std::sync::Arc; From 0e49c9b22066a4e24fb3edfce349286aaaf1a7fe Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 13 Dec 2024 15:40:30 -0800 Subject: [PATCH 4/7] Fix up table changes docs --- kernel/src/table_changes/mod.rs | 35 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index 3ecbc0969..48b5aeb89 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -71,21 +71,28 @@ static CDF_FIELDS: LazyLock<[StructField; 3]> = LazyLock::new(|| { /// - `_change_type`: String representing the type of change that for that commit. This may be one /// of `delete`, `insert`, `update_preimage`, or `update_postimage`. /// - `_commit_version`: Long representing the commit the change occurred in. -/// - `_commit_timestamp`: Time at which the commit occurred. If In-commit timestamps is enabled, -/// this is retrieved from the [`CommitInfo`] action. Otherwise, the timestamp is the same as the -/// commit file's modification timestamp. +/// - `_commit_timestamp`: Time at which the commit occurred. The timestamp is retrieved from the +/// file modification time of the log file. No timezone is associated with the timestamp. +/// +/// Currently, in-commit timestamps (ICT) is not supported. In the future when ICT is enabled, the +/// timestamp will be retrieved from the `inCommitTimestamp` field of the CommitInfo` action. +/// See issue [#559](https://github.com/delta-io/delta-kernel-rs/issues/559) +/// For details on In-Commit Timestamps, see the [Protocol](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#in-commit-timestamps). +/// /// /// Three properties must hold for the entire CDF range: -/// - Reading must be supported for every commit in the range. This is determined using [`ensure_read_supported`] +/// - Reading must be supported for every commit in the range. Currently the only read feature allowed +/// is deletion vectors. This will be expanded in the future to support more delta table features. +/// Because only deletion vectors are supported, reader version 2 will not be allowed. That is +// because version 2 requires that column mapping is enabled. Reader versions 1 and 3 are allowed. /// - Change Data Feed must be enabled for the entire range with the `delta.enableChangeDataFeed` -/// table property set to 'true'. +/// table property set to `true`. Performing change data feed on tables with column mapping is +/// currently disallowed. We check that column mapping is disabled, or the column mapping mode is `None`. /// - The schema for each commit must be compatible with the end schema. This means that all the /// same fields and their nullability are the same. Schema compatibility will be expanded in the /// future to allow compatible schemas that are not the exact same. /// See issue [#523](https://github.com/delta-io/delta-kernel-rs/issues/523) /// -/// [`CommitInfo`]: crate::actions::CommitInfo -/// [`ensure_read_supported`]: crate::actions::Protocol::ensure_read_supported /// # Examples /// Get `TableChanges` for versions 0 to 1 (inclusive) /// ```rust @@ -225,11 +232,8 @@ impl TableChanges { } } -/// Ensures that change data feed is enabled in `table_properties`. -/// -/// Performing change data feed on tables with column mapping is currently disallowed. -/// This will be less restrictive in the future. Because column mapping is disallowed, we also -/// check that column mapping is disabled, or the column mapping mode is `None`. +/// Ensures that change data feed is enabled in `table_properties`. See the documentation +/// of [`TableChanges`] for more details. fn check_cdf_table_properties(table_properties: &TableProperties) -> DeltaResult<()> { require!( table_properties.enable_change_data_feed.unwrap_or(false), @@ -246,12 +250,7 @@ fn check_cdf_table_properties(table_properties: &TableProperties) -> DeltaResult } /// Ensures that Change Data Feed is supported for a table with this [`Protocol`] . -// -// Currently the only read feature allowed is deletion vectors. This will be expanded in the -// future to support more delta table features. -// -// Because only deletion vectors are supported, reader version 2 will not be allowed. That is -// because version 2 requires that column mapping is enabled. Reader versions 1 and 3 are allowed. +//See the documentation of [`TableChanges`] for more details. fn ensure_cdf_read_supported(protocol: &Protocol) -> DeltaResult<()> { static CDF_SUPPORTED_READER_FEATURES: LazyLock> = LazyLock::new(|| HashSet::from([ReaderFeatures::DeletionVectors])); From 0b13634b7025a3c84ddcf91e4337dafed6e33110 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 13 Dec 2024 15:44:03 -0800 Subject: [PATCH 5/7] address nits --- kernel/examples/read-table-changes/src/main.rs | 4 ++-- kernel/src/table_changes/mod.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/examples/read-table-changes/src/main.rs b/kernel/examples/read-table-changes/src/main.rs index 1fd4d071e..3360a06cf 100644 --- a/kernel/examples/read-table-changes/src/main.rs +++ b/kernel/examples/read-table-changes/src/main.rs @@ -34,8 +34,8 @@ fn main() -> DeltaResult<()> { )?); let table_changes = table.table_changes(engine.as_ref(), cli.start_version, cli.end_version)?; - let x = table_changes.into_scan_builder().build()?; - let batches: Vec = x + let table_changes_scan = table_changes.into_scan_builder().build()?; + let batches: Vec = table_changes_scan .execute(engine.clone())? .map(|scan_result| -> DeltaResult<_> { let scan_result = scan_result?; diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index 48b5aeb89..3d5e7f656 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -11,10 +11,10 @@ //! // Construct a table from a path oaeuhoanut //! let table = Table::try_from_uri(path)?; //! -//! // Declare the version range for the table's change data feed +//! // Get the table changes (change data feed) between version 0 and 1 //! let table_changes = table.table_changes(engine.as_ref(), 0, 1)?; //! -//! // Optionally specify a schema and predicate for the table changes scan +//! // Optionally specify a schema and predicate to apply to the table changes scan //! let schema = table_changes //! .schema() //! .project(&["id", "_commit_version"])?; @@ -28,7 +28,7 @@ //! .build()?; //! //! // Execute the table changes scan to get a fallible iterator of `ScanResult`s -//! let batches = table_changes_scan.execute(engine.clone())?; +//! let table_change_batches = table_changes_scan.execute(engine.clone())?; //! # Ok::<(), Error>(()) //! ``` use std::collections::HashSet; From 27b4b9182a17119accb491ad8dea97ed1ace6170 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 13 Dec 2024 15:46:06 -0800 Subject: [PATCH 6/7] fix spacing --- kernel/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 4e2e3a174..1d6902d86 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -18,7 +18,7 @@ //! https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-single-threaded //! [read-table-multi-threaded]: //! https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-multi-threaded -//![read-table-changes]: +//! [read-table-changes]: //! https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-changes //! //! Simple write examples can be found in the [`write.rs`] integration tests. Standalone write From 47dcb2c8c395c14ed59609e577d2bbdcc4b9201a Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 13 Dec 2024 16:48:11 -0800 Subject: [PATCH 7/7] Fix nit --- kernel/src/table_changes/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index 3d5e7f656..b74f65b7a 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -250,7 +250,7 @@ fn check_cdf_table_properties(table_properties: &TableProperties) -> DeltaResult } /// Ensures that Change Data Feed is supported for a table with this [`Protocol`] . -//See the documentation of [`TableChanges`] for more details. +/// See the documentation of [`TableChanges`] for more details. fn ensure_cdf_read_supported(protocol: &Protocol) -> DeltaResult<()> { static CDF_SUPPORTED_READER_FEATURES: LazyLock> = LazyLock::new(|| HashSet::from([ReaderFeatures::DeletionVectors]));