diff --git a/404.html b/404.html
index 235f06c7a3..7fffe4d558 100644
--- a/404.html
+++ b/404.html
@@ -212,16 +212,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="/delta-io/delta-rs/." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="/delta-io/delta-rs/." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -360,19 +363,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="/delta-io/delta-rs/." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="/delta-io/delta-rs/." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/delta-io/delta-rs/why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1086,6 +1156,8 @@
       
         
       
+        
+      
     
     
     
@@ -1146,6 +1218,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="/delta-io/delta-rs/integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="/delta-io/delta-rs/integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/catalog/index.html b/api/catalog/index.html
index 9eaa7dc644..04cd52bf8b 100644
--- a/api/catalog/index.html
+++ b/api/catalog/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1162,6 +1232,8 @@
       
         
       
+        
+      
     
     
     
@@ -1222,6 +1294,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/delta_table/delta_table_alterer/index.html b/api/delta_table/delta_table_alterer/index.html
index f715702e41..6c8405923d 100644
--- a/api/delta_table/delta_table_alterer/index.html
+++ b/api/delta_table/delta_table_alterer/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1159,6 +1229,8 @@
       
         
       
+        
+      
     
     
     
@@ -1219,6 +1291,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/delta_table/delta_table_merger/index.html b/api/delta_table/delta_table_merger/index.html
index 599bb44dd8..d68db372a2 100644
--- a/api/delta_table/delta_table_merger/index.html
+++ b/api/delta_table/delta_table_merger/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1215,6 +1285,8 @@
       
         
       
+        
+      
     
     
     
@@ -1275,6 +1347,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/delta_table/delta_table_optimizer/index.html b/api/delta_table/delta_table_optimizer/index.html
index 01bd5d2673..b46a97e26e 100644
--- a/api/delta_table/delta_table_optimizer/index.html
+++ b/api/delta_table/delta_table_optimizer/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1166,6 +1236,8 @@
       
         
       
+        
+      
     
     
     
@@ -1226,6 +1298,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/delta_table/index.html b/api/delta_table/index.html
index 08826b9ad5..c8e097f795 100644
--- a/api/delta_table/index.html
+++ b/api/delta_table/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1099,6 +1169,8 @@
       
         
       
+        
+      
     
     
     
@@ -1159,6 +1231,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/delta_table/metadata/index.html b/api/delta_table/metadata/index.html
index 81bdf430eb..b17727453b 100644
--- a/api/delta_table/metadata/index.html
+++ b/api/delta_table/metadata/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1194,6 +1264,8 @@
       
         
       
+        
+      
     
     
     
@@ -1254,6 +1326,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/delta_writer/index.html b/api/delta_writer/index.html
index befce230e8..90048f02f1 100644
--- a/api/delta_writer/index.html
+++ b/api/delta_writer/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1186,6 +1256,8 @@
       
         
       
+        
+      
     
     
     
@@ -1246,6 +1318,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/exceptions/index.html b/api/exceptions/index.html
index fd6d63fc56..918dd8ee01 100644
--- a/api/exceptions/index.html
+++ b/api/exceptions/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1167,6 +1237,8 @@
       
         
       
+        
+      
     
     
     
@@ -1227,6 +1299,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/schema/index.html b/api/schema/index.html
index 1e65d0d013..64e5a57363 100644
--- a/api/schema/index.html
+++ b/api/schema/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1505,6 +1575,8 @@
       
         
       
+        
+      
     
     
     
@@ -1565,6 +1637,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/api/storage/index.html b/api/storage/index.html
index cf5488b144..9dd86ef360 100644
--- a/api/storage/index.html
+++ b/api/storage/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1180,6 +1250,8 @@
       
         
       
+        
+      
     
     
     
@@ -1240,6 +1312,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/delta-interop.png b/delta-interop.png
new file mode 100644
index 0000000000..7eb4abfd2c
Binary files /dev/null and b/delta-interop.png differ
diff --git a/how-delta-lake-works/architecture-of-delta-table/index.html b/how-delta-lake-works/architecture-of-delta-table/index.html
index 2a2c09ae90..839385ddd1 100644
--- a/how-delta-lake-works/architecture-of-delta-table/index.html
+++ b/how-delta-lake-works/architecture-of-delta-table/index.html
@@ -221,16 +221,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -371,19 +374,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1097,6 +1167,8 @@
       
         
       
+        
+      
     
     
     
@@ -1157,6 +1229,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/index.html b/index.html
index 4b3cdb3bd3..9585914cbb 100644
--- a/index.html
+++ b/index.html
@@ -12,7 +12,7 @@
       
       
       
-        <link rel="next" href="usage/installation/">
+        <link rel="next" href="why-use-delta-lake/">
       
       
       <link rel="icon" href="delta-rust-no-whitespace.svg">
@@ -20,7 +20,7 @@
     
     
       
-        <title>Delta Lake Documentation</title>
+        <title>Home - Delta Lake Documentation</title>
       
     
     
@@ -223,16 +223,19 @@
     
   
   
-    <li class="md-tabs__item md-tabs__item--active">
-      <a href="." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item md-tabs__item--active">
+        <a href="." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,81 +376,80 @@
     
   
   
-    <li class="md-nav__item md-nav__item--active">
-      
-      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
+    
+    
       
+        
+          
+        
       
         
       
+    
+    
+    
+    
       
-        <label class="md-nav__link md-nav__link--active" for="__toc">
-          
-  
-  <span class="md-ellipsis">
-    Home
-  </span>
-  
-
-          <span class="md-nav__icon md-icon"></span>
-        </label>
       
-      <a href="." class="md-nav__link md-nav__link--active">
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" checked>
         
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="." class="md-nav__link md-nav__link--active">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
-      </a>
-      
+            </a>
+            
+              
+              <label class="md-nav__link md-nav__link--active" for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
         
-
-<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-  
-  
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
   
-    
   
   
-    <label class="md-nav__title" for="__toc">
-      <span class="md-nav__icon md-icon"></span>
-      Table of contents
-    </label>
-    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-      
-        <li class="md-nav__item">
-  <a href="#important-terminology" class="md-nav__link">
-    Important terminology
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#why-implement-the-delta-lake-transaction-log-protocol-in-rust-and-scala" class="md-nav__link">
-    Why implement the Delta Lake transaction log protocol in Rust and Scala?
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#contributing" class="md-nav__link">
-    Contributing
-  </a>
+    <li class="md-nav__item">
+      <a href="why-use-delta-lake/" class="md-nav__link">
+        
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#project-history" class="md-nav__link">
-    Project history
-  </a>
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
   
-</li>
-      
-    </ul>
+
+      </a>
+    </li>
   
-</nav>
+
+              
+            
+          </ul>
+        </nav>
       
     </li>
   
@@ -1165,6 +1167,8 @@
       
         
       
+        
+      
     
     
     
@@ -1225,6 +1229,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="integrations/delta-lake-pandas/" class="md-nav__link">
         
@@ -1456,13 +1480,13 @@ <h2 id="project-history">Project history</h2>
         
         
           
-          <a href="usage/installation/" class="md-footer__link md-footer__link--next" aria-label="Next: Installation">
+          <a href="why-use-delta-lake/" class="md-footer__link md-footer__link--next" aria-label="Next: Why Use Delta Lake">
             <div class="md-footer__title">
               <span class="md-footer__direction">
                 Next
               </span>
               <div class="md-ellipsis">
-                Installation
+                Why Use Delta Lake
               </div>
             </div>
             <div class="md-footer__button md-icon">
diff --git a/integrations/delta-lake-arrow/index.html b/integrations/delta-lake-arrow/index.html
index 68d09a9e23..c34a8d37bf 100644
--- a/integrations/delta-lake-arrow/index.html
+++ b/integrations/delta-lake-arrow/index.html
@@ -14,7 +14,7 @@
         <link rel="prev" href="../../api/exceptions/">
       
       
-        <link rel="next" href="../delta-lake-pandas/">
+        <link rel="next" href="../delta-lake-datafusion/">
       
       
       <link rel="icon" href="../../delta-rust-no-whitespace.svg">
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1100,6 +1170,8 @@
         
       
         
+      
+        
       
     
     
@@ -1234,6 +1306,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../delta-lake-pandas/" class="md-nav__link">
         
@@ -1540,13 +1632,13 @@ <h2 id="conclusion">Conclusion</h2>
         
         
           
-          <a href="../delta-lake-pandas/" class="md-footer__link md-footer__link--next" aria-label="Next: pandas">
+          <a href="../delta-lake-datafusion/" class="md-footer__link md-footer__link--next" aria-label="Next: DataFusion">
             <div class="md-footer__title">
               <span class="md-footer__direction">
                 Next
               </span>
               <div class="md-ellipsis">
-                pandas
+                DataFusion
               </div>
             </div>
             <div class="md-footer__button md-icon">
diff --git a/integrations/delta-lake-datafusion/index.html b/integrations/delta-lake-datafusion/index.html
index 89ae9cb872..d631dd4a08 100644
--- a/integrations/delta-lake-datafusion/index.html
+++ b/integrations/delta-lake-datafusion/index.html
@@ -11,6 +11,10 @@
         <link rel="canonical" href="https://github.com/delta-io/delta-rs/integrations/delta-lake-datafusion/">
       
       
+        <link rel="prev" href="../delta-lake-arrow/">
+      
+      
+        <link rel="next" href="../delta-lake-pandas/">
       
       
       <link rel="icon" href="../../delta-rust-no-whitespace.svg">
@@ -18,7 +22,7 @@
     
     
       
-        <title>Using Delta Lake with DataFusion - Delta Lake Documentation</title>
+        <title>DataFusion - Delta Lake Documentation</title>
       
     
     
@@ -112,7 +116,7 @@
         <div class="md-header__topic" data-md-component="header-topic">
           <span class="md-ellipsis">
             
-              Using Delta Lake with DataFusion
+              DataFusion
             
           </span>
         </div>
@@ -219,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -275,10 +282,12 @@
         
   
   
+    
+  
   
     
     
-      <li class="md-tabs__item">
+      <li class="md-tabs__item md-tabs__item--active">
         <a href="../delta-lake-arrow/" class="md-tabs__link">
           
   
@@ -367,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
         
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1083,6 +1159,8 @@
       
   
   
+    
+  
   
     
     
@@ -1092,6 +1170,8 @@
         
       
         
+      
+        
       
     
     
@@ -1100,14 +1180,12 @@
       
       
     
-    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+    <li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
       
         
         
         
-          
-        
-        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
         
           
           <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
@@ -1121,7 +1199,7 @@
             <span class="md-nav__icon md-icon"></span>
           </label>
         
-        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
           <label class="md-nav__title" for="__nav_4">
             <span class="md-nav__icon md-icon"></span>
             Integrations
@@ -1152,6 +1230,94 @@
                 
   
   
+    
+  
+  
+    <li class="md-nav__item md-nav__item--active">
+      
+      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
+      
+      
+        
+      
+      
+        <label class="md-nav__link md-nav__link--active" for="__toc">
+          
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+          <span class="md-nav__icon md-icon"></span>
+        </label>
+      
+      <a href="./" class="md-nav__link md-nav__link--active">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+      
+        
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#delta-lake-performance-benefits-for-datafusion-users" class="md-nav__link">
+    Delta Lake performance benefits for DataFusion users
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#delta-lake-features-for-datafusion-users" class="md-nav__link">
+    Delta Lake features for DataFusion users
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#why-delta-lake-depends-on-datafusion" class="md-nav__link">
+    Why Delta Lake depends on DataFusion
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#conclusion" class="md-nav__link">
+    Conclusion
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
   
     <li class="md-nav__item">
       <a href="../delta-lake-pandas/" class="md-nav__link">
@@ -1410,6 +1576,44 @@ <h2 id="conclusion">Conclusion</h2>
         <footer class="md-footer">
   
     
+      
+      <nav class="md-footer__inner md-grid" aria-label="Footer" >
+        
+          
+          <a href="../delta-lake-arrow/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Arrow">
+            <div class="md-footer__button md-icon">
+              
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
+            </div>
+            <div class="md-footer__title">
+              <span class="md-footer__direction">
+                Previous
+              </span>
+              <div class="md-ellipsis">
+                Arrow
+              </div>
+            </div>
+          </a>
+        
+        
+          
+          <a href="../delta-lake-pandas/" class="md-footer__link md-footer__link--next" aria-label="Next: pandas">
+            <div class="md-footer__title">
+              <span class="md-footer__direction">
+                Next
+              </span>
+              <div class="md-ellipsis">
+                pandas
+              </div>
+            </div>
+            <div class="md-footer__button md-icon">
+              
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4Z"/></svg>
+            </div>
+          </a>
+        
+      </nav>
+    
   
   <div class="md-footer-meta md-typeset">
     <div class="md-footer-meta__inner md-grid">
diff --git a/integrations/delta-lake-pandas/index.html b/integrations/delta-lake-pandas/index.html
index b3137e7065..3b7b2c4125 100644
--- a/integrations/delta-lake-pandas/index.html
+++ b/integrations/delta-lake-pandas/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://github.com/delta-io/delta-rs/integrations/delta-lake-pandas/">
       
       
-        <link rel="prev" href="../delta-lake-arrow/">
+        <link rel="prev" href="../delta-lake-datafusion/">
       
       
         <link rel="next" href="../delta-lake-polars/">
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1100,6 +1170,8 @@
         
       
         
+      
+        
       
     
     
@@ -1158,6 +1230,26 @@
                 
   
   
+  
+    <li class="md-nav__item">
+      <a href="../delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
     
   
   
@@ -1700,7 +1792,7 @@ <h2 id="conclusion">Conclusion</h2>
       <nav class="md-footer__inner md-grid" aria-label="Footer" >
         
           
-          <a href="../delta-lake-arrow/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Arrow">
+          <a href="../delta-lake-datafusion/" class="md-footer__link md-footer__link--prev" aria-label="Previous: DataFusion">
             <div class="md-footer__button md-icon">
               
               <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
@@ -1710,7 +1802,7 @@ <h2 id="conclusion">Conclusion</h2>
                 Previous
               </span>
               <div class="md-ellipsis">
-                Arrow
+                DataFusion
               </div>
             </div>
           </a>
diff --git a/integrations/delta-lake-polars/index.html b/integrations/delta-lake-polars/index.html
index 24536f4899..49d58e2520 100644
--- a/integrations/delta-lake-polars/index.html
+++ b/integrations/delta-lake-polars/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1100,6 +1170,8 @@
         
       
         
+      
+        
       
     
     
@@ -1159,6 +1231,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../delta-lake-pandas/" class="md-nav__link">
         
diff --git a/search/search_index.json b/search/search_index.json
index 897f691168..81bc2efb32 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"The deltalake package","text":"<p>This is the documentation for the native Rust/Python implementation of Delta Lake. It is based on the delta-rs Rust library and requires no Spark or JVM dependencies. For the PySpark implementation, see delta-spark instead.</p> <p>This module provides the capability to read, write, and manage Delta Lake tables with Python or Rust without Spark or Java. It uses Apache Arrow under the hood, so is compatible with other Arrow-native or integrated libraries such as pandas, DuckDB, and Polars.</p>"},{"location":"#important-terminology","title":"Important terminology","text":"<ul> <li>\"Rust deltalake\" refers to the Rust API of delta-rs (no Spark dependency)</li> <li>\"Python deltalake\" refers to the Python API of delta-rs (no Spark dependency)</li> <li>\"Delta Spark\" refers to the Scala impementation of the Delta Lake transaction log protocol.  This depends on Spark and Java.</li> </ul>"},{"location":"#why-implement-the-delta-lake-transaction-log-protocol-in-rust-and-scala","title":"Why implement the Delta Lake transaction log protocol in Rust and Scala?","text":"<p>Delta Spark depends on Java and Spark, which is fine for many use cases, but not all Delta Lake users want to depend on these libraries.  delta-rs allows using Delta Lake in Rust or other native projects when using a JVM is often not an option.</p> <p>Python deltalake lets you query Delta tables without depending on Java/Scala.</p> <p>Suppose you want to query a Delta table with pandas on your local machine.  Python deltalake makes it easy to query the table with a simple <code>pip install</code> command - no need to install Java.</p>"},{"location":"#contributing","title":"Contributing","text":"<p>The Delta Lake community welcomes contributors from all developers, regardless of your experience or programming background.</p> <p>You can write Rust code, Python code, documentation, submit bugs, or give talks to the community.  We welcome all of these contributions.</p> <p>Feel free to join our Slack and message us in the #delta-rs channel any time!</p> <p>We value kind communication and building a productive, friendly environment for maximum collaboration and fun.</p>"},{"location":"#project-history","title":"Project history","text":"<p>Check out this video by Denny Lee &amp; QP Hou to learn about the genesis of the delta-rs project:</p>"},{"location":"api/catalog/","title":"Catalog","text":"","boost":2},{"location":"api/catalog/#deltalake.data_catalog.DataCatalog","title":"deltalake.data_catalog.DataCatalog","text":"<p>             Bases: <code>Enum</code></p> <p>List of the Data Catalogs</p>","boost":2},{"location":"api/catalog/#deltalake.data_catalog.DataCatalog.AWS","title":"AWS  <code>class-attribute</code> <code>instance-attribute</code>","text":"<pre><code>AWS = 'glue'\n</code></pre> <p>Refers to the <code>AWS Glue Data Catalog &lt;https://docs.aws.amazon.com/glue/latest/dg/catalog-and-crawler.html&gt;</code>_</p>","boost":2},{"location":"api/catalog/#deltalake.data_catalog.DataCatalog.UNITY","title":"UNITY  <code>class-attribute</code> <code>instance-attribute</code>","text":"<pre><code>UNITY = 'unity'\n</code></pre> <p>Refers to the <code>Databricks Unity Catalog &lt;https://docs.databricks.com/data-governance/unity-catalog/index.html&gt;</code>_</p>","boost":2},{"location":"api/delta_writer/","title":"Writer","text":"","boost":10},{"location":"api/delta_writer/#write-to-delta-tables","title":"Write to Delta Tables","text":"","boost":10},{"location":"api/delta_writer/#deltalake.write_deltalake","title":"deltalake.write_deltalake","text":"<pre><code>write_deltalake(table_or_uri: Union[str, Path, DeltaTable], data: Union[pd.DataFrame, ds.Dataset, pa.Table, pa.RecordBatch, Iterable[pa.RecordBatch], RecordBatchReader], *, schema: Optional[Union[pa.Schema, DeltaSchema]] = None, partition_by: Optional[Union[List[str], str]] = None, filesystem: Optional[pa_fs.FileSystem] = None, mode: Literal['error', 'append', 'overwrite', 'ignore'] = 'error', file_options: Optional[ds.ParquetFileWriteOptions] = None, max_partitions: Optional[int] = None, max_open_files: int = 1024, max_rows_per_file: int = 10 * 1024 * 1024, min_rows_per_group: int = 64 * 1024, max_rows_per_group: int = 128 * 1024, name: Optional[str] = None, description: Optional[str] = None, configuration: Optional[Mapping[str, Optional[str]]] = None, overwrite_schema: bool = False, storage_options: Optional[Dict[str, str]] = None, partition_filters: Optional[List[Tuple[str, str, Any]]] = None, predicate: Optional[str] = None, large_dtypes: bool = False, engine: Literal['pyarrow', 'rust'] = 'pyarrow', writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; None\n</code></pre> <p>Write to a Delta Lake table</p> <p>If the table does not already exist, it will be created.</p> <p>This function only supports writer protocol version 2 currently. When attempting to write to an existing table with a higher min_writer_version, this function will throw DeltaProtocolError.</p> <p>Note that this function does NOT register this table in a data catalog.</p> <p>A locking mechanism is needed to prevent unsafe concurrent writes to a delta lake directory when writing to S3. DynamoDB is the only available locking provider at the moment in delta-rs. To enable DynamoDB as the locking provider, you need to set the <code>AWS_S3_LOCKING_PROVIDER</code> to 'dynamodb' as a storage_option or as an environment variable.</p> <p>Additionally, you must create a DynamoDB table with the name 'delta_rs_lock_table' so that it can be automatically discovered by delta-rs. Alternatively, you can use a table name of your choice, but you must set the <code>DELTA_DYNAMO_TABLE_NAME</code> variable to match your chosen table name. The required schema for the DynamoDB table is as follows:</p> <ul> <li>Key Schema: AttributeName=key, KeyType=HASH</li> <li>Attribute Definitions: AttributeName=key, AttributeType=S</li> </ul> <p>Please note that this locking mechanism is not compatible with any other locking mechanisms, including the one used by Spark.</p> <p>Parameters:</p> Name Type Description Default <code>table_or_uri</code> <code>Union[str, Path, DeltaTable]</code> <p>URI of a table or a DeltaTable object.</p> required <code>data</code> <code>Union[DataFrame, Dataset, Table, RecordBatch, Iterable[RecordBatch], RecordBatchReader]</code> <p>Data to write. If passing iterable, the schema must also be given.</p> required <code>schema</code> <code>Optional[Union[Schema, Schema]]</code> <p>Optional schema to write.</p> <code>None</code> <code>partition_by</code> <code>Optional[Union[List[str], str]]</code> <p>List of columns to partition the table by. Only required when creating a new table.</p> <code>None</code> <code>filesystem</code> <code>Optional[FileSystem]</code> <p>Optional filesystem to pass to PyArrow. If not provided will be inferred from uri. The file system has to be rooted in the table root. Use the pyarrow.fs.SubTreeFileSystem, to adopt the root of pyarrow file systems.</p> <code>None</code> <code>mode</code> <code>Literal['error', 'append', 'overwrite', 'ignore']</code> <p>How to handle existing data. Default is to error if table already exists. If 'append', will add new data. If 'overwrite', will replace table with new data. If 'ignore', will not write anything if table already exists.</p> <code>'error'</code> <code>file_options</code> <code>Optional[ParquetFileWriteOptions]</code> <p>Optional write options for Parquet (ParquetFileWriteOptions). Can be provided with defaults using ParquetFileWriteOptions().make_write_options(). Please refer to https://github.com/apache/arrow/blob/master/python/pyarrow/_dataset_parquet.pyx#L492-L533 for the list of available options. Only used in pyarrow engine.</p> <code>None</code> <code>max_partitions</code> <code>Optional[int]</code> <p>the maximum number of partitions that will be used. Only used in pyarrow engine.</p> <code>None</code> <code>max_open_files</code> <code>int</code> <p>Limits the maximum number of files that can be left open while writing. If an attempt is made to open too many files then the least recently used file will be closed. If this setting is set too low you may end up fragmenting your data into many small files. Only used in pyarrow engine.</p> <code>1024</code> <code>max_rows_per_file</code> <code>int</code> <p>Maximum number of rows per file. If greater than 0 then this will limit how many rows are placed in any single file. Otherwise there will be no limit and one file will be created in each output directory unless files need to be closed to respect max_open_files min_rows_per_group: Minimum number of rows per group. When the value is set, the dataset writer will batch incoming data and only write the row groups to the disk when sufficient rows have accumulated. Only used in pyarrow engine.</p> <code>10 * 1024 * 1024</code> <code>max_rows_per_group</code> <code>int</code> <p>Maximum number of rows per group. If the value is set, then the dataset writer may split up large incoming batches into multiple row groups. If this value is set, then min_rows_per_group should also be set.</p> <code>128 * 1024</code> <code>name</code> <code>Optional[str]</code> <p>User-provided identifier for this table.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>User-provided description for this table.</p> <code>None</code> <code>configuration</code> <code>Optional[Mapping[str, Optional[str]]]</code> <p>A map containing configuration options for the metadata action.</p> <code>None</code> <code>overwrite_schema</code> <code>bool</code> <p>If True, allows updating the schema of the table.</p> <code>False</code> <code>storage_options</code> <code>Optional[Dict[str, str]]</code> <p>options passed to the native delta filesystem. Unused if 'filesystem' is defined.</p> <code>None</code> <code>predicate</code> <code>Optional[str]</code> <p>When using <code>Overwrite</code> mode, replace data that matches a predicate. Only used in rust engine.</p> <code>None</code> <code>partition_filters</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>the partition filters that will be used for partition overwrite. Only used in pyarrow engine.</p> <code>None</code> <code>large_dtypes</code> <code>bool</code> <p>If True, the data schema is kept in large_dtypes, has no effect on pandas dataframe input.</p> <code>False</code> <code>engine</code> <code>Literal['pyarrow', 'rust']</code> <p>writer engine to write the delta table. <code>Rust</code> engine is still experimental but you may see up to 4x performance improvements over pyarrow.</p> <code>'pyarrow'</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>Custom metadata to add to the commitInfo.</p> <code>None</code>","boost":10},{"location":"api/delta_writer/#deltalake.WriterProperties","title":"deltalake.WriterProperties  <code>dataclass</code>","text":"<pre><code>WriterProperties(data_page_size_limit: Optional[int] = None, dictionary_page_size_limit: Optional[int] = None, data_page_row_count_limit: Optional[int] = None, write_batch_size: Optional[int] = None, max_row_group_size: Optional[int] = None, compression: Optional[Literal['UNCOMPRESSED', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD', 'LZ4_RAW']] = None, compression_level: Optional[int] = None)\n</code></pre> <p>A Writer Properties instance for the Rust parquet writer.</p> <p>Create a Writer Properties instance for the Rust parquet writer:</p> <p>Parameters:</p> Name Type Description Default <code>data_page_size_limit</code> <code>Optional[int]</code> <p>Limit DataPage size to this in bytes.</p> <code>None</code> <code>dictionary_page_size_limit</code> <code>Optional[int]</code> <p>Limit the size of each DataPage to store dicts to this amount in bytes.</p> <code>None</code> <code>data_page_row_count_limit</code> <code>Optional[int]</code> <p>Limit the number of rows in each DataPage.</p> <code>None</code> <code>write_batch_size</code> <code>Optional[int]</code> <p>Splits internally to smaller batch size.</p> <code>None</code> <code>max_row_group_size</code> <code>Optional[int]</code> <p>Max number of rows in row group.</p> <code>None</code> <code>compression</code> <code>Optional[Literal['UNCOMPRESSED', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD', 'LZ4_RAW']]</code> <p>compression type.</p> <code>None</code> <code>compression_level</code> <code>Optional[int]</code> <p>If none and compression has a level, the default level will be used, only relevant for GZIP: levels (1-9), BROTLI: levels (1-11), ZSTD: levels (1-22),</p> <code>None</code>","boost":10},{"location":"api/delta_writer/#convert-to-delta-tables","title":"Convert to Delta Tables","text":"","boost":10},{"location":"api/delta_writer/#deltalake.convert_to_deltalake","title":"deltalake.convert_to_deltalake","text":"<pre><code>convert_to_deltalake(uri: Union[str, Path], mode: Literal['error', 'ignore'] = 'error', partition_by: Optional[pa.Schema] = None, partition_strategy: Optional[Literal['hive']] = None, name: Optional[str] = None, description: Optional[str] = None, configuration: Optional[Mapping[str, Optional[str]]] = None, storage_options: Optional[Dict[str, str]] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; None\n</code></pre> <p><code>Convert</code> parquet tables <code>to delta</code> tables.</p> <p>Currently only HIVE partitioned tables are supported. <code>Convert to delta</code> creates a transaction log commit with add actions, and additional properties provided such as configuration, name, and description.</p> <p>Parameters:</p> Name Type Description Default <code>uri</code> <code>Union[str, Path]</code> <p>URI of a table.</p> required <code>partition_by</code> <code>Optional[Schema]</code> <p>Optional partitioning schema if table is partitioned.</p> <code>None</code> <code>partition_strategy</code> <code>Optional[Literal['hive']]</code> <p>Optional partition strategy to read and convert</p> <code>None</code> <code>mode</code> <code>Literal['error', 'ignore']</code> <p>How to handle existing data. Default is to error if table already exists. If 'ignore', will not convert anything if table already exists.</p> <code>'error'</code> <code>name</code> <code>Optional[str]</code> <p>User-provided identifier for this table.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>User-provided description for this table.</p> <code>None</code> <code>configuration</code> <code>Optional[Mapping[str, Optional[str]]]</code> <p>A map containing configuration options for the metadata action.</p> <code>None</code> <code>storage_options</code> <code>Optional[Dict[str, str]]</code> <p>options passed to the native delta filesystem. Unused if 'filesystem' is defined.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit</p> <code>None</code>","boost":10},{"location":"api/exceptions/","title":"Exceptions","text":"","boost":2},{"location":"api/exceptions/#deltalake.exceptions.DeltaError","title":"deltalake.exceptions.DeltaError","text":"<p>             Bases: <code>builtins.Exception</code></p> <p>The base class for Delta-specific errors.</p>","boost":2},{"location":"api/exceptions/#deltalake.exceptions.DeltaProtocolError","title":"deltalake.exceptions.DeltaProtocolError","text":"<p>             Bases: <code>_internal.DeltaError</code></p> <p>Raised when a violation with the Delta protocol specs ocurred.</p>","boost":2},{"location":"api/exceptions/#deltalake.exceptions.TableNotFoundError","title":"deltalake.exceptions.TableNotFoundError","text":"<p>             Bases: <code>_internal.DeltaError</code></p> <p>Raised when a Delta table cannot be loaded from a location.</p>","boost":2},{"location":"api/exceptions/#deltalake.exceptions.CommitFailedError","title":"deltalake.exceptions.CommitFailedError","text":"<p>             Bases: <code>_internal.DeltaError</code></p> <p>Raised when a commit to a Delta table fails.</p>","boost":2},{"location":"api/schema/","title":"Schema","text":"","boost":2},{"location":"api/schema/#schema-and-field","title":"Schema and field","text":"<p>Schemas, fields, and data types are provided in the <code>deltalake.schema</code> submodule.</p>","boost":2},{"location":"api/schema/#deltalake.Schema","title":"deltalake.Schema","text":"<pre><code>Schema(fields: List[Field])\n</code></pre> <p>             Bases: <code>deltalake._internal.StructType</code></p> <p>A Delta Lake schema</p> <p>Create using a list of :class:<code>Field</code>:</p> <p>Schema([Field(\"x\", \"integer\"), Field(\"y\", \"string\")]) Schema([Field(x, PrimitiveType(\"integer\"), nullable=True), Field(y, PrimitiveType(\"string\"), nullable=True)])</p> <p>Or create from a PyArrow schema:</p> <p>import pyarrow as pa Schema.from_pyarrow(pa.schema({\"x\": pa.int32(), \"y\": pa.string()})) Schema([Field(x, PrimitiveType(\"integer\"), nullable=True), Field(y, PrimitiveType(\"string\"), nullable=True)])</p>","boost":2},{"location":"api/schema/#deltalake.Schema.invariants","title":"invariants","text":"<pre><code>invariants: List[Tuple[str, str]] = &lt;attribute 'invariants' of 'deltalake._internal.Schema' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Schema.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(schema_json) -&gt; Schema\n</code></pre> <p>Create a new Schema from a JSON string.</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required Example <p>A schema has the same JSON format as a StructType. <pre><code>Schema.from_json('''{\n    \"type\": \"struct\",\n    \"fields\": [{\"name\": \"x\", \"type\": \"integer\", \"nullable\": true, \"metadata\": {}}]\n    }\n)'''\n# Returns Schema([Field(x, PrimitiveType(\"integer\"), nullable=True)])\n</code></pre></p>","boost":2},{"location":"api/schema/#deltalake.Schema.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; Schema\n</code></pre> <p>Create a Schema from a PyArrow Schema type</p> <p>Will raise <code>TypeError</code> if the PyArrow type is not a primitive type.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>Schema</code> <p>A PyArrow Schema</p> required <p>Returns:</p> Type Description <code>Schema</code> <p>a Schema</p>","boost":2},{"location":"api/schema/#deltalake.Schema.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get the JSON string representation of the Schema.</p> <p>Returns:</p> Type Description <code>str</code> <p>a JSON string</p> Example <p>A schema has the same JSON format as a StructType. <pre><code>Schema([Field(\"x\", \"integer\")]).to_json()\n# Returns '{\"type\":\"struct\",\"fields\":[{\"name\":\"x\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}'\n</code></pre></p>","boost":2},{"location":"api/schema/#deltalake.Schema.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow(as_large_types: bool = False) -&gt; pyarrow.Schema\n</code></pre> <p>Return equivalent PyArrow schema</p> <p>Parameters:</p> Name Type Description Default <code>as_large_types</code> <code>bool</code> <p>get schema with all variable size types (list, binary, string) as large variants (with int64 indices). This is for compatibility with systems like Polars that only support the large versions of Arrow types.</p> <code>False</code> <p>Returns:</p> Type Description <code>Schema</code> <p>a PyArrow Schema</p>","boost":2},{"location":"api/schema/#deltalake.Field","title":"deltalake.Field","text":"<pre><code>Field(name: str, type: DataType, *, nullable: bool = True, metadata: Optional[Dict[str, Any]] = None)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.metadata","title":"metadata","text":"<pre><code>metadata: Dict[str, Any] = &lt;attribute 'metadata' of 'deltalake._internal.Field' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.name","title":"name","text":"<pre><code>name: str = &lt;attribute 'name' of 'deltalake._internal.Field' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.nullable","title":"nullable","text":"<pre><code>nullable: bool = &lt;attribute 'nullable' of 'deltalake._internal.Field' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.type","title":"type","text":"<pre><code>type: DataType = &lt;attribute 'type' of 'deltalake._internal.Field' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(field_json) -&gt; Field\n</code></pre> <p>Create a Field from a JSON string.</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>the JSON string.</p> required <p>Returns:</p> Type Description <code>Field</code> <p>Field</p> Example <pre><code>Field.from_json('''{\n        \"name\": \"col\",\n        \"type\": \"integer\",\n        \"nullable\": true,\n        \"metadata\": {}\n    }'''\n)\n# Returns Field(col, PrimitiveType(\"integer\"), nullable=True)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(field: pyarrow.Field) -&gt; Field\n</code></pre> <p>Create a Field from a PyArrow field Note: This currently doesn't preserve field metadata.</p> <p>Parameters:</p> Name Type Description Default <code>field</code> <code>Field</code> <p>a PyArrow Field</p> required <p>Returns:</p> Type Description <code>Field</code> <p>a Field</p>","boost":2},{"location":"api/schema/#deltalake.Field.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get the field as JSON string.</p> <p>Returns:</p> Type Description <code>str</code> <p>a JSON string</p> Example <pre><code>Field(\"col\", \"integer\").to_json()\n# Returns '{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}'\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.Field\n</code></pre> <p>Convert to an equivalent PyArrow field Note: This currently doesn't preserve field metadata.</p> <p>Returns:</p> Type Description <code>Field</code> <p>a pyarrow Field</p>","boost":2},{"location":"api/schema/#data-types","title":"Data types","text":"","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType","title":"deltalake.schema.PrimitiveType","text":"<pre><code>PrimitiveType(data_type: str)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType.type","title":"type","text":"<pre><code>type: str = &lt;attribute 'type' of 'deltalake._internal.PrimitiveType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(type_json) -&gt; PrimitiveType\n</code></pre> <p>Create a PrimitiveType from a JSON string</p> <p>The JSON representation for a primitive type is just a quoted string: <code>PrimitiveType.from_json('\"integer\"')</code></p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required <p>Returns:</p> Type Description <code>PrimitiveType</code> <p>a PrimitiveType type</p>","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; PrimitiveType\n</code></pre> <p>Create a PrimitiveType from a PyArrow datatype</p> <p>Will raise <code>TypeError</code> if the PyArrow type is not a primitive type.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>DataType</code> <p>A PyArrow DataType</p> required <p>Returns:</p> Type Description <code>PrimitiveType</code> <p>a PrimitiveType</p>","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.DataType\n</code></pre> <p>Get the equivalent PyArrow type (pyarrow.DataType)</p>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType","title":"deltalake.schema.ArrayType","text":"<pre><code>ArrayType(element_type: DataType, *, contains_null: bool = True)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.contains_null","title":"contains_null","text":"<pre><code>contains_null: bool = &lt;attribute 'contains_null' of 'deltalake._internal.ArrayType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.element_type","title":"element_type","text":"<pre><code>element_type: DataType = &lt;attribute 'element_type' of 'deltalake._internal.ArrayType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.type","title":"type","text":"<pre><code>type: Literal['array'] = &lt;attribute 'type' of 'deltalake._internal.ArrayType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(type_json) -&gt; ArrayType\n</code></pre> <p>Create an ArrayType from a JSON string</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required <p>Returns:</p> Type Description <code>ArrayType</code> <p>an ArrayType</p> Example <p>The JSON representation for an array type is an object with <code>type</code> (set to <code>\"array\"</code>), <code>elementType</code>, and <code>containsNull</code>. <pre><code>ArrayType.from_json(\n    '''{\n        \"type\": \"array\",\n        \"elementType\": \"integer\",\n        \"containsNull\": false\n    }'''\n)\n# Returns ArrayType(PrimitiveType(\"integer\"), contains_null=False)\n</code></pre></p>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; ArrayType\n</code></pre> <p>Create an ArrayType from a pyarrow.ListType.</p> <p>Will raise <code>TypeError</code> if a different PyArrow DataType is provided.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>ListType</code> <p>The PyArrow ListType</p> required <p>Returns:</p> Type Description <code>ArrayType</code> <p>an ArrayType</p>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get the JSON string representation of the type.</p>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.ListType\n</code></pre> <p>Get the equivalent PyArrow type.</p>","boost":2},{"location":"api/schema/#deltalake.schema.MapType","title":"deltalake.schema.MapType","text":"<pre><code>MapType(key_type: DataType, value_type: DataType, *, value_contains_null: bool = True)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.key_type","title":"key_type","text":"<pre><code>key_type: DataType = &lt;attribute 'key_type' of 'deltalake._internal.MapType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.type","title":"type","text":"<pre><code>type: Literal['map'] = &lt;attribute 'type' of 'deltalake._internal.MapType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.value_contains_null","title":"value_contains_null","text":"<pre><code>value_contains_null: bool = &lt;attribute 'value_contains_null' of 'deltalake._internal.MapType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.value_type","title":"value_type","text":"<pre><code>value_type: DataType = &lt;attribute 'value_type' of 'deltalake._internal.MapType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(type_json) -&gt; MapType\n</code></pre> <p>Create a MapType from a JSON string</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required <p>Returns:</p> Type Description <code>MapType</code> <p>an ArrayType</p> Example <p>The JSON representation for a map type is an object with <code>type</code> (set to <code>map</code>), <code>keyType</code>, <code>valueType</code>, and <code>valueContainsNull</code>:</p> <pre><code>MapType.from_json(\n    '''{\n        \"type\": \"map\",\n        \"keyType\": \"integer\",\n        \"valueType\": \"string\",\n        \"valueContainsNull\": true\n    }'''\n)\n# Returns MapType(PrimitiveType(\"integer\"), PrimitiveType(\"string\"), value_contains_null=True)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; MapType\n</code></pre> <p>Create a MapType from a PyArrow MapType.</p> <p>Will raise <code>TypeError</code> if passed a different type.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>MapType</code> <p>the PyArrow MapType</p> required <p>Returns:</p> Type Description <code>MapType</code> <p>a MapType</p>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get JSON string representation of map type.</p> <p>Returns:</p> Type Description <code>str</code> <p>a JSON string</p>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.MapType\n</code></pre> <p>Get the equivalent PyArrow data type.</p>","boost":2},{"location":"api/schema/#deltalake.schema.StructType","title":"deltalake.schema.StructType","text":"<pre><code>StructType(fields: List[Field])\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.fields","title":"fields","text":"<pre><code>fields: List[Field] = &lt;attribute 'fields' of 'deltalake._internal.StructType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.type","title":"type","text":"<pre><code>type: Literal['struct'] = &lt;attribute 'type' of 'deltalake._internal.StructType' objects&gt;\n</code></pre> <p>The string \"struct\"</p>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(type_json) -&gt; StructType\n</code></pre> <p>Create a new StructType from a JSON string.</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required <p>Returns:</p> Type Description <code>StructType</code> <p>a StructType</p> Example <pre><code>StructType.from_json(\n    '''{\n        \"type\": \"struct\",\n        \"fields\": [{\"name\": \"x\", \"type\": \"integer\", \"nullable\": true, \"metadata\": {}}]\n    }'''\n)\n# Returns StructType([Field(x, PrimitiveType(\"integer\"), nullable=True)])\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; StructType\n</code></pre> <p>Create a new StructType from a PyArrow struct type.</p> <p>Will raise <code>TypeError</code> if a different data type is provided.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>StructType</code> <p>a PyArrow struct type.</p> required <p>Returns:</p> Type Description <code>StructType</code> <p>a StructType</p>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get the JSON representation of the type.</p> <p>Returns:</p> Type Description <code>str</code> <p>a JSON string</p> Example <pre><code>StructType([Field(\"x\", \"integer\")]).to_json()\n# Returns '{\"type\":\"struct\",\"fields\":[{\"name\":\"x\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}'\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.StructType\n</code></pre> <p>Get the equivalent PyArrow StructType</p> <p>Returns:</p> Type Description <code>StructType</code> <p>a PyArrow StructType</p>","boost":2},{"location":"api/storage/","title":"Storage","text":"<p>The delta filesystem handler for the pyarrow engine writer.</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler","title":"deltalake.fs.DeltaStorageHandler","text":"<pre><code>DeltaStorageHandler(root: str, options: dict[str, str] | None = None, known_sizes: dict[str, int] | None = None)\n</code></pre> <p>             Bases: <code>DeltaFileSystemHandler</code>, <code>FileSystemHandler</code></p> <p>DeltaStorageHandler is a concrete implementations of a PyArrow FileSystemHandler.</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler.get_file_info_selector","title":"get_file_info_selector","text":"<pre><code>get_file_info_selector(selector: FileSelector) -&gt; List[FileInfo]\n</code></pre> <p>Get info for the files defined by FileSelector.</p> <p>Parameters:</p> Name Type Description Default <code>selector</code> <code>FileSelector</code> <p>FileSelector object</p> required <p>Returns:</p> Type Description <code>List[FileInfo]</code> <p>list of file info objects</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler.open_input_file","title":"open_input_file","text":"<pre><code>open_input_file(path: str) -&gt; pa.PythonFile\n</code></pre> <p>Open an input file for random access reading.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The source to open for reading.</p> required <p>Returns:</p> Type Description <code>PythonFile</code> <p>NativeFile</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler.open_input_stream","title":"open_input_stream","text":"<pre><code>open_input_stream(path: str) -&gt; pa.PythonFile\n</code></pre> <p>Open an input stream for sequential reading.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The source to open for reading.</p> required <p>Returns:</p> Type Description <code>PythonFile</code> <p>NativeFile</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler.open_output_stream","title":"open_output_stream","text":"<pre><code>open_output_stream(path: str, metadata: Optional[Dict[str, str]] = None) -&gt; pa.PythonFile\n</code></pre> <p>Open an output stream for sequential writing.</p> <p>If the target already exists, existing data is truncated.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The source to open for writing.</p> required <code>metadata</code> <code>Optional[Dict[str, str]]</code> <p>If not None, a mapping of string keys to string values.</p> <code>None</code> <p>Returns:</p> Type Description <code>PythonFile</code> <p>NativeFile</p>","boost":2},{"location":"api/delta_table/","title":"DeltaTable","text":"","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable","title":"deltalake.DeltaTable  <code>dataclass</code>","text":"<pre><code>DeltaTable(table_uri: Union[str, Path, os.PathLike[str]], version: Optional[int] = None, storage_options: Optional[Dict[str, str]] = None, without_files: bool = False, log_buffer_size: Optional[int] = None)\n</code></pre> <p>Represents a Delta Table</p> <p>Create the Delta Table from a path with an optional version. Multiple StorageBackends are currently supported: AWS S3, Azure Data Lake Storage Gen2, Google Cloud Storage (GCS) and local URI. Depending on the storage backend used, you could provide options values using the <code>storage_options</code> parameter.</p> <p>Parameters:</p> Name Type Description Default <code>table_uri</code> <code>Union[str, Path, PathLike[str]]</code> <p>the path of the DeltaTable</p> required <code>version</code> <code>Optional[int]</code> <p>version of the DeltaTable</p> <code>None</code> <code>storage_options</code> <code>Optional[Dict[str, str]]</code> <p>a dictionary of the options to use for the storage backend</p> <code>None</code> <code>without_files</code> <code>bool</code> <p>If True, will load table without tracking files.                 Some append-only applications might have no need of tracking any files. So, the                 DeltaTable will be loaded with a significant memory reduction.</p> <code>False</code> <code>log_buffer_size</code> <code>Optional[int]</code> <p>Number of files to buffer when reading the commit log. A positive integer.                 Setting a value greater than 1 results in concurrent calls to the storage api.                 This can decrease latency if there are many files in the log since the last checkpoint,                 but will also increase memory usage. Possible rate limits of the storage backend should                 also be considered for optimal performance. Defaults to 4 * number of cpus.</p> <code>None</code>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.alter","title":"alter  <code>property</code>","text":"<pre><code>alter: TableAlterer\n</code></pre> <p>Namespace for all table alter related methods.</p> <p>Returns:</p> Name Type Description <code>TableAlterer</code> <code>TableAlterer</code> <p>TableAlterer Object</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.optimize","title":"optimize  <code>property</code>","text":"<pre><code>optimize: TableOptimizer\n</code></pre> <p>Namespace for all table optimize related methods.</p> <p>Returns:</p> Name Type Description <code>TableOptimizer</code> <code>TableOptimizer</code> <p>TableOptimizer Object</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.cleanup_metadata","title":"cleanup_metadata","text":"<pre><code>cleanup_metadata() -&gt; None\n</code></pre> <p>Delete expired log files before current version from table. The table log retention is based on the <code>configuration.logRetentionDuration</code> value, 30 days by default.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.create","title":"create  <code>classmethod</code>","text":"<pre><code>create(table_uri: Union[str, Path], schema: Union[pyarrow.Schema, DeltaSchema], mode: Literal['error', 'append', 'overwrite', 'ignore'] = 'error', partition_by: Optional[Union[List[str], str]] = None, name: Optional[str] = None, description: Optional[str] = None, configuration: Optional[Mapping[str, Optional[str]]] = None, storage_options: Optional[Dict[str, str]] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; DeltaTable\n</code></pre> <p><code>CREATE</code> or <code>CREATE_OR_REPLACE</code> a delta table given a table_uri.</p> <p>Parameters:</p> Name Type Description Default <code>table_uri</code> <code>Union[str, Path]</code> <p>URI of a table</p> required <code>schema</code> <code>Union[Schema, Schema]</code> <p>Table schema</p> required <code>mode</code> <code>Literal['error', 'append', 'overwrite', 'ignore']</code> <p>How to handle existing data. Default is to error if table already exists. If 'append', returns not support error if table exists. If 'overwrite', will <code>CREATE_OR_REPLACE</code> table. If 'ignore', will not do anything if table already exists. Defaults to \"error\".</p> <code>'error'</code> <code>partition_by</code> <code>Optional[Union[List[str], str]]</code> <p>List of columns to partition the table by.</p> <code>None</code> <code>name</code> <code>Optional[str]</code> <p>User-provided identifier for this table.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>User-provided description for this table.</p> <code>None</code> <code>configuration</code> <code>Optional[Mapping[str, Optional[str]]]</code> <p>A map containing configuration options for the metadata action.</p> <code>None</code> <code>storage_options</code> <code>Optional[Dict[str, str]]</code> <p>options passed to the object store crate.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>DeltaTable</code> <code>DeltaTable</code> <p>created delta table</p> Example <pre><code>import pyarrow as pa\n\nfrom deltalake import DeltaTable\n\ndt = DeltaTable.create(\n    table_uri=\"my_local_table\",\n    schema=pa.schema(\n        [pa.field(\"foo\", pa.string()), pa.field(\"bar\", pa.string())]\n    ),\n    mode=\"error\",\n    partition_by=\"bar\",\n)\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.delete","title":"delete","text":"<pre><code>delete(predicate: Optional[str] = None, writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Delete records from a Delta Table that statisfy a predicate.</p> <p>When a predicate is not provided then all records are deleted from the Delta Table. Otherwise a scan of the Delta table is performed to mark any files that contain records that satisfy the predicate. Once files are determined they are rewritten without the records.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>Optional[str]</code> <p>a SQL where clause. If not passed, will delete all rows.</p> <code>None</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Type Description <code>Dict[str, Any]</code> <p>the metrics from delete.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.file_uris","title":"file_uris","text":"<pre><code>file_uris(partition_filters: Optional[List[Tuple[str, str, Any]]] = None) -&gt; List[str]\n</code></pre> <p>Get the list of files as absolute URIs, including the scheme (e.g. \"s3://\").</p> <p>Local files will be just plain absolute paths, without a scheme. (That is, no 'file://' prefix.)</p> <p>Use the partition_filters parameter to retrieve a subset of files that match the given filters.</p> <p>Parameters:</p> Name Type Description Default <code>partition_filters</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>the partition filters that will be used for getting the matched files</p> <code>None</code> <p>Returns:</p> Type Description <code>List[str]</code> <p>list of the .parquet files with an absolute URI referenced for the current version of the DeltaTable</p> <p>Predicates are expressed in disjunctive normal form (DNF), like [(\"x\", \"=\", \"a\"), ...]. DNF allows arbitrary boolean logical combinations of single partition predicates. The innermost tuples each describe a single partition predicate. The list of inner predicates is interpreted as a conjunction (AND), forming a more selective and multiple partition predicates. Each tuple has format: (key, op, value) and compares the key with the value. The supported op are: <code>=</code>, <code>!=</code>, <code>in</code>, and <code>not in</code>. If the op is in or not in, the value must be a collection such as a list, a set or a tuple. The supported type for value is str. Use empty string <code>''</code> for Null partition value.</p> Example <pre><code>(\"x\", \"=\", \"a\")\n(\"x\", \"!=\", \"a\")\n(\"y\", \"in\", [\"a\", \"b\", \"c\"])\n(\"z\", \"not in\", [\"a\",\"b\"])\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.files","title":"files","text":"<pre><code>files(partition_filters: Optional[List[Tuple[str, str, Any]]] = None) -&gt; List[str]\n</code></pre> <p>Get the .parquet files of the DeltaTable.</p> <p>The paths are as they are saved in the delta log, which may either be relative to the table root or absolute URIs.</p> <p>Parameters:</p> Name Type Description Default <code>partition_filters</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>the partition filters that will be used for                 getting the matched files</p> <code>None</code> <p>Returns:</p> Type Description <code>List[str]</code> <p>list of the .parquet files referenced for the current version of the DeltaTable</p> <p>Predicates are expressed in disjunctive normal form (DNF), like [(\"x\", \"=\", \"a\"), ...]. DNF allows arbitrary boolean logical combinations of single partition predicates. The innermost tuples each describe a single partition predicate. The list of inner predicates is interpreted as a conjunction (AND), forming a more selective and multiple partition predicates. Each tuple has format: (key, op, value) and compares the key with the value. The supported op are: <code>=</code>, <code>!=</code>, <code>in</code>, and <code>not in</code>. If the op is in or not in, the value must be a collection such as a list, a set or a tuple. The supported type for value is str. Use empty string <code>''</code> for Null partition value.</p> Example <pre><code>(\"x\", \"=\", \"a\")\n(\"x\", \"!=\", \"a\")\n(\"y\", \"in\", [\"a\", \"b\", \"c\"])\n(\"z\", \"not in\", [\"a\",\"b\"])\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.from_data_catalog","title":"from_data_catalog  <code>classmethod</code>","text":"<pre><code>from_data_catalog(data_catalog: DataCatalog, database_name: str, table_name: str, data_catalog_id: Optional[str] = None, version: Optional[int] = None, log_buffer_size: Optional[int] = None) -&gt; DeltaTable\n</code></pre> <p>Create the Delta Table from a Data Catalog.</p> <p>Parameters:</p> Name Type Description Default <code>data_catalog</code> <code>DataCatalog</code> <p>the Catalog to use for getting the storage location of the Delta Table</p> required <code>database_name</code> <code>str</code> <p>the database name inside the Data Catalog</p> required <code>table_name</code> <code>str</code> <p>the table name inside the Data Catalog</p> required <code>data_catalog_id</code> <code>Optional[str]</code> <p>the identifier of the Data Catalog</p> <code>None</code> <code>version</code> <code>Optional[int]</code> <p>version of the DeltaTable</p> <code>None</code> <code>log_buffer_size</code> <code>Optional[int]</code> <p>Number of files to buffer when reading the commit log. A positive integer.                 Setting a value greater than 1 results in concurrent calls to the storage api.                 This can decrease latency if there are many files in the log since the last checkpoint,                 but will also increase memory usage. Possible rate limits of the storage backend should                 also be considered for optimal performance. Defaults to 4 * number of cpus.</p> <code>None</code>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.get_add_actions","title":"get_add_actions","text":"<pre><code>get_add_actions(flatten: bool = False) -&gt; pyarrow.RecordBatch\n</code></pre> <p>Return a dataframe with all current add actions.</p> <p>Add actions represent the files that currently make up the table. This data is a low-level representation parsed from the transaction log.</p> <p>Parameters:</p> Name Type Description Default <code>flatten</code> <code>bool</code> <p>whether to flatten the schema. Partition values columns are         given the prefix <code>partition.</code>, statistics (null_count, min, and max) are         given the prefix <code>null_count.</code>, <code>min.</code>, and <code>max.</code>, and tags the         prefix <code>tags.</code>. Nested field names are concatenated with <code>.</code>.</p> <code>False</code> <p>Returns:</p> Type Description <code>RecordBatch</code> <p>a PyArrow RecordBatch containing the add action data.</p> Example <pre><code>from pprint import pprint\nfrom deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data, partition_by=[\"x\"])\ndt = DeltaTable(\"tmp\")\ndf = dt.get_add_actions().to_pandas()\ndf[\"path\"].sort_values(ignore_index=True)\n0    x=1/0\n1    x=2/0\n2    x=3/0\n</code></pre> <pre><code>df = dt.get_add_actions(flatten=True).to_pandas()\ndf[\"partition.x\"].sort_values(ignore_index=True)\n0    1\n1    2\n2    3\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.history","title":"history","text":"<pre><code>history(limit: Optional[int] = None) -&gt; List[Dict[str, Any]]\n</code></pre> <p>Run the history command on the DeltaTable. The operations are returned in reverse chronological order.</p> <p>Parameters:</p> Name Type Description Default <code>limit</code> <code>Optional[int]</code> <p>the commit info limit to return</p> <code>None</code> <p>Returns:</p> Type Description <code>List[Dict[str, Any]]</code> <p>list of the commit infos registered in the transaction log</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.load_as_version","title":"load_as_version","text":"<pre><code>load_as_version(version: Union[int, str, datetime]) -&gt; None\n</code></pre> <p>Load/time travel a DeltaTable to a specified version number, or a timestamp version of the table. If a string is passed then the argument should be an RFC 3339 and ISO 8601 date and time string format.</p> <p>Parameters:</p> Name Type Description Default <code>version</code> <code>Union[int, str, datetime]</code> <p>the identifier of the version of the DeltaTable to load</p> required Example <p>Use a version number <pre><code>dt = DeltaTable(\"test_table\")\ndt.load_as_version(1)\n</code></pre></p> <p>Use a datetime object <pre><code>dt.load_as_version(datetime(2023,1,1))\n</code></pre></p> <p>Use a datetime in string format <pre><code>dt.load_as_version(\"2018-01-26T18:30:09Z\")\ndt.load_as_version(\"2018-12-19T16:39:57-08:00\")\ndt.load_as_version(\"2018-01-26T18:30:09.453+00:00\")\n</code></pre></p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.load_version","title":"load_version","text":"<pre><code>load_version(version: int) -&gt; None\n</code></pre> <p>Load a DeltaTable with a specified version.</p> <p>Deprecated</p> <p>Load_version and load_with_datetime have been combined into <code>DeltaTable.load_as_version</code>.</p> <p>Parameters:</p> Name Type Description Default <code>version</code> <code>int</code> <p>the identifier of the version of the DeltaTable to load</p> required","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.load_with_datetime","title":"load_with_datetime","text":"<pre><code>load_with_datetime(datetime_string: str) -&gt; None\n</code></pre> <p>Time travel Delta table to the latest version that's created at or before provided <code>datetime_string</code> argument. The <code>datetime_string</code> argument should be an RFC 3339 and ISO 8601 date and time string.</p> <p>Deprecated</p> <p>Load_version and load_with_datetime have been combined into <code>DeltaTable.load_as_version</code>.</p> <p>Parameters:</p> Name Type Description Default <code>datetime_string</code> <code>str</code> <p>the identifier of the datetime point of the DeltaTable to load</p> required Example <pre><code>\"2018-01-26T18:30:09Z\"\n\"2018-12-19T16:39:57-08:00\"\n\"2018-01-26T18:30:09.453+00:00\"\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.merge","title":"merge","text":"<pre><code>merge(source: Union[pyarrow.Table, pyarrow.RecordBatch, pyarrow.RecordBatchReader, ds.Dataset, pandas.DataFrame], predicate: str, source_alias: Optional[str] = None, target_alias: Optional[str] = None, error_on_type_mismatch: bool = True, writer_properties: Optional[WriterProperties] = None, large_dtypes: bool = True, custom_metadata: Optional[Dict[str, str]] = None) -&gt; TableMerger\n</code></pre> <p>Pass the source data which you want to merge on the target delta table, providing a predicate in SQL query like format. You can also specify on what to do when the underlying data types do not match the underlying table.</p> <p>Parameters:</p> Name Type Description Default <code>source</code> <code>Union[Table, RecordBatch, RecordBatchReader, Dataset, DataFrame]</code> <p>source data</p> required <code>predicate</code> <code>str</code> <p>SQL like predicate on how to merge</p> required <code>source_alias</code> <code>Optional[str]</code> <p>Alias for the source table</p> <code>None</code> <code>target_alias</code> <code>Optional[str]</code> <p>Alias for the target table</p> <code>None</code> <code>error_on_type_mismatch</code> <code>bool</code> <p>specify if merge will return error if data types are mismatching :default = True</p> <code>True</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer</p> <code>None</code> <code>large_dtypes</code> <code>bool</code> <p>If True, the data schema is kept in large_dtypes.</p> <code>True</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.metadata","title":"metadata","text":"<pre><code>metadata() -&gt; Metadata\n</code></pre> <p>Get the current metadata of the DeltaTable.</p> <p>Returns:</p> Type Description <code>Metadata</code> <p>the current Metadata registered in the transaction log</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.protocol","title":"protocol","text":"<pre><code>protocol() -&gt; ProtocolVersions\n</code></pre> <p>Get the reader and writer protocol versions of the DeltaTable.</p> <p>Returns:</p> Type Description <code>ProtocolVersions</code> <p>the current ProtocolVersions registered in the transaction log</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.repair","title":"repair","text":"<pre><code>repair(dry_run: bool = False, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Repair the Delta Table by auditing active files that do not exist in the underlying filesystem and removes them. This can be useful when there are accidental deletions or corrupted files.</p> <p>Active files are ones that have an add action in the log, but no corresponding remove action. This operation creates a new FSCK transaction containing a remove action for each of the missing or corrupted files.</p> <p>Parameters:</p> Name Type Description Default <code>dry_run</code> <code>bool</code> <p>when activated, list only the files, otherwise add remove actions to transaction log. Defaults to False.</p> <code>False</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:     The metrics from repair (FSCK) action.</p> Example <p><pre><code>from deltalake import DeltaTable\ndt = DeltaTable('TEST')\ndt.repair(dry_run=False)\n</code></pre> Results in <pre><code>{'dry_run': False, 'files_removed': ['6-0d084325-6885-4847-b008-82c1cf30674c-0.parquet', 5-4fba1d3e-3e20-4de1-933d-a8e13ac59f53-0.parquet']}\n</code></pre></p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.restore","title":"restore","text":"<pre><code>restore(target: Union[int, datetime, str], *, ignore_missing_files: bool = False, protocol_downgrade_allowed: bool = False, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Run the Restore command on the Delta Table: restore table to a given version or datetime.</p> <p>Parameters:</p> Name Type Description Default <code>target</code> <code>Union[int, datetime, str]</code> <p>the expected version will restore, which represented by int, date str or datetime.</p> required <code>ignore_missing_files</code> <code>bool</code> <p>whether the operation carry on when some data files missing.</p> <code>False</code> <code>protocol_downgrade_allowed</code> <code>bool</code> <p>whether the operation when protocol version upgraded.</p> <code>False</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Type Description <code>Dict[str, Any]</code> <p>the metrics from restore.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.schema","title":"schema","text":"<pre><code>schema() -&gt; DeltaSchema\n</code></pre> <p>Get the current schema of the DeltaTable.</p> <p>Returns:</p> Type Description <code>Schema</code> <p>the current Schema registered in the transaction log</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.to_pandas","title":"to_pandas","text":"<pre><code>to_pandas(partitions: Optional[List[Tuple[str, str, Any]]] = None, columns: Optional[List[str]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, filters: Optional[FilterType] = None) -&gt; pandas.DataFrame\n</code></pre> <p>Build a pandas dataframe using data from the DeltaTable.</p> <p>Parameters:</p> Name Type Description Default <code>partitions</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax</p> <code>None</code> <code>columns</code> <code>Optional[List[str]]</code> <p>The columns to project. This can be a list of column names to include (order and duplicates will be preserved)</p> <code>None</code> <code>filesystem</code> <code>Optional[Union[str, FileSystem]]</code> <p>A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem</p> <code>None</code> <code>filters</code> <code>Optional[FilterType]</code> <p>A disjunctive normal form (DNF) predicate for filtering rows. If you pass a filter you do not need to pass <code>partitions</code></p> <code>None</code>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.to_pyarrow_dataset","title":"to_pyarrow_dataset","text":"<pre><code>to_pyarrow_dataset(partitions: Optional[List[Tuple[str, str, Any]]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, parquet_read_options: Optional[ParquetReadOptions] = None) -&gt; pyarrow.dataset.Dataset\n</code></pre> <p>Build a PyArrow Dataset using data from the DeltaTable.</p> <p>Parameters:</p> Name Type Description Default <code>partitions</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax</p> <code>None</code> <code>filesystem</code> <code>Optional[Union[str, FileSystem]]</code> <p>A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem</p> <code>None</code> <code>parquet_read_options</code> <code>Optional[ParquetReadOptions]</code> <p>Optional read options for Parquet. Use this to handle INT96 to timestamp conversion for edge cases like 0001-01-01 or 9999-12-31</p> <code>None</code> <p>More info: https://arrow.apache.org/docs/python/generated/pyarrow.dataset.ParquetReadOptions.html</p> <p>Returns:</p> Type Description <code>Dataset</code> <p>the PyArrow dataset in PyArrow</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.to_pyarrow_table","title":"to_pyarrow_table","text":"<pre><code>to_pyarrow_table(partitions: Optional[List[Tuple[str, str, Any]]] = None, columns: Optional[List[str]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, filters: Optional[FilterType] = None) -&gt; pyarrow.Table\n</code></pre> <p>Build a PyArrow Table using data from the DeltaTable.</p> <p>Parameters:</p> Name Type Description Default <code>partitions</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax</p> <code>None</code> <code>columns</code> <code>Optional[List[str]]</code> <p>The columns to project. This can be a list of column names to include (order and duplicates will be preserved)</p> <code>None</code> <code>filesystem</code> <code>Optional[Union[str, FileSystem]]</code> <p>A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem</p> <code>None</code> <code>filters</code> <code>Optional[FilterType]</code> <p>A disjunctive normal form (DNF) predicate for filtering rows. If you pass a filter you do not need to pass <code>partitions</code></p> <code>None</code>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.update","title":"update","text":"<pre><code>update(updates: Optional[Dict[str, str]] = None, new_values: Optional[Dict[str, Union[int, float, str, datetime, bool, List[Any]]]] = None, predicate: Optional[str] = None, writer_properties: Optional[WriterProperties] = None, error_on_type_mismatch: bool = True, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p><code>UPDATE</code> records in the Delta Table that matches an optional predicate. Either updates or new_values needs to be passed for it to execute.</p> <p>Parameters:</p> Name Type Description Default <code>updates</code> <code>Optional[Dict[str, str]]</code> <p>a mapping of column name to update SQL expression.</p> <code>None</code> <code>new_values</code> <code>Optional[Dict[str, Union[int, float, str, datetime, bool, List[Any]]]]</code> <p>a mapping of column name to python datatype.</p> <code>None</code> <code>predicate</code> <code>Optional[str]</code> <p>a logical expression.</p> <code>None</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>error_on_type_mismatch</code> <code>bool</code> <p>specify if update will return error if data types are mismatching :default = True</p> <code>True</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:     the metrics from update</p> Example <p>Update some row values with SQL predicate</p> <p>This is equivalent to <code>UPDATE table SET deleted = true WHERE id = '3'</code> <pre><code>from deltalake import write_deltalake, DeltaTable\nimport pandas as pd\ndf = pd.DataFrame(\n    {\"id\": [\"1\", \"2\", \"3\"],\n    \"deleted\": [False, False, False],\n    \"price\": [10., 15., 20.]\n    })\nwrite_deltalake(\"tmp\", df)\ndt = DeltaTable(\"tmp\")\ndt.update(predicate=\"id = '3'\", updates = {\"deleted\": 'True'})\n\n{'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 1, 'num_copied_rows': 2, 'execution_time_ms': ..., 'scan_time_ms': ...}\n</code></pre></p> <p>Update all row values</p> <p>This is equivalent to <code>UPDATE table SET deleted = true, id = concat(id, '_old')</code>. <pre><code>dt.update(updates = {\"deleted\": 'True', \"id\": \"concat(id, '_old')\"})\n\n{'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 3, 'num_copied_rows': 0, 'execution_time_ms': ..., 'scan_time_ms': ...}\n</code></pre></p> <p>Use Python objects instead of SQL strings</p> <p>Use the <code>new_values</code> parameter instead of the <code>updates</code> parameter. For example, this is equivalent to <code>UPDATE table SET price = 150.10 WHERE id = '1'</code> <pre><code>dt.update(predicate=\"id = '1_old'\", new_values = {\"price\": 150.10})\n\n{'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 1, 'num_copied_rows': 2, 'execution_time_ms': ..., 'scan_time_ms': ...}\n</code></pre></p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.update_incremental","title":"update_incremental","text":"<pre><code>update_incremental() -&gt; None\n</code></pre> <p>Updates the DeltaTable to the latest version by incrementally applying newer versions.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.vacuum","title":"vacuum","text":"<pre><code>vacuum(retention_hours: Optional[int] = None, dry_run: bool = True, enforce_retention_duration: bool = True, custom_metadata: Optional[Dict[str, str]] = None) -&gt; List[str]\n</code></pre> <p>Run the Vacuum command on the Delta Table: list and delete files no longer referenced by the Delta table and are older than the retention threshold.</p> <p>Parameters:</p> Name Type Description Default <code>retention_hours</code> <code>Optional[int]</code> <p>the retention threshold in hours, if none then the value from <code>configuration.deletedFileRetentionDuration</code> is used or default of 1 week otherwise.</p> <code>None</code> <code>dry_run</code> <code>bool</code> <p>when activated, list only the files, delete otherwise</p> <code>True</code> <code>enforce_retention_duration</code> <code>bool</code> <p>when disabled, accepts retention hours smaller than the value from <code>configuration.deletedFileRetentionDuration</code>.</p> <code>True</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:     the list of files no longer referenced by the Delta Table and are older than the retention threshold.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.version","title":"version","text":"<pre><code>version() -&gt; int\n</code></pre> <p>Get the version of the DeltaTable.</p> <p>Returns:</p> Type Description <code>int</code> <p>The current version of the DeltaTable</p>","boost":2},{"location":"api/delta_table/delta_table_alterer/","title":"TableAlterer","text":"","boost":10},{"location":"api/delta_table/delta_table_alterer/#deltalake.table.TableAlterer","title":"deltalake.table.TableAlterer","text":"<pre><code>TableAlterer(table: DeltaTable)\n</code></pre> <p>API for various table alteration commands.</p>","boost":10},{"location":"api/delta_table/delta_table_alterer/#deltalake.table.TableAlterer.add_constraint","title":"add_constraint","text":"<pre><code>add_constraint(constraints: Dict[str, str], custom_metadata: Optional[Dict[str, str]] = None) -&gt; None\n</code></pre> <p>Add constraints to the table. Limited to <code>single constraint</code> at once.</p> <p>Parameters:</p> Name Type Description Default <code>constraints</code> <code>Dict[str, str]</code> <p>mapping of constraint name to SQL-expression to evaluate on write</p> required <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Example:     <pre><code>from deltalake import DeltaTable\ndt = DeltaTable(\"test_table_constraints\")\ndt.alter.add_constraint({\n    \"value_gt_5\": \"value &gt; 5\",\n})\n</code></pre></p> <pre><code>**Check configuration**\n```\ndt.metadata().configuration\n{'delta.constraints.value_gt_5': 'value &gt; 5'}\n```\n</code></pre>","boost":10},{"location":"api/delta_table/delta_table_merger/","title":"TableMerger","text":"","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger","title":"deltalake.table.TableMerger","text":"<pre><code>TableMerger(table: DeltaTable, source: pyarrow.RecordBatchReader, predicate: str, source_alias: Optional[str] = None, target_alias: Optional[str] = None, safe_cast: bool = True, writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None)\n</code></pre> <p>API for various table <code>MERGE</code> commands.</p>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.execute","title":"execute","text":"<pre><code>execute() -&gt; Dict[str, Any]\n</code></pre> <p>Executes <code>MERGE</code> with the previously provided settings in Rust with Apache Datafusion query engine.</p> <p>Returns:</p> Name Type Description <code>Dict</code> <code>Dict[str, Any]</code> <p>metrics</p>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_matched_delete","title":"when_matched_delete","text":"<pre><code>when_matched_delete(predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Delete a matched row from the table only if the given <code>predicate</code> (if specified) is true for the matched row. If not specified it deletes all matches.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>(str | None, Optional)</code> <p>SQL like predicate on when to delete.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <p>Delete on a predicate</p> <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [2, 3], \"deleted\": [False, True]})\n\n(\n    dt.merge(\n        source=new_data,\n        predicate='target.x = source.x',\n        source_alias='source',\n        target_alias='target')\n    .when_matched_delete(\n        predicate=\"source.deleted = true\")\n    .execute()\n)\n{'num_source_rows': 2, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 1, 'num_target_rows_copied': 2, 'num_output_rows': 2, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas().sort_values(\"x\", ignore_index=True)\n   x  y\n0  1  4\n1  2  5\n</code></pre> <p>Delete all records that were matched <pre><code>dt = DeltaTable(\"tmp\")\n(\n    dt.merge(\n        source=new_data,\n        predicate='target.x = source.x',\n        source_alias='source',\n        target_alias='target')\n    .when_matched_delete()\n    .execute()\n)\n{'num_source_rows': 2, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 1, 'num_target_rows_copied': 1, 'num_output_rows': 1, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas()\n   x  y\n0  1  4\n</code></pre></p>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_matched_update","title":"when_matched_update","text":"<pre><code>when_matched_update(updates: Dict[str, str], predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Update a matched table row based on the rules defined by <code>updates</code>. If a <code>predicate</code> is specified, then it must evaluate to true for the row to be updated.</p> <p>Parameters:</p> Name Type Description Default <code>updates</code> <code>Dict[str, str]</code> <p>a mapping of column name to update SQL expression.</p> required <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to update.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [1], \"y\": [7]})\n\n(\n     dt.merge(\n         source=new_data,\n         predicate=\"target.x = source.x\",\n         source_alias=\"source\",\n         target_alias=\"target\")\n     .when_matched_update(updates={\"x\": \"source.x\", \"y\": \"source.y\"})\n     .execute()\n)\n{'num_source_rows': 1, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas()\n   x  y\n0  1  7\n1  2  5\n2  3  6\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_matched_update_all","title":"when_matched_update_all","text":"<pre><code>when_matched_update_all(predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Updating all source fields to target fields, source and target are required to have the same field names. If a <code>predicate</code> is specified, then it must evaluate to true for the row to be updated.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to update all columns.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [1], \"y\": [7]})\n\n(\n    dt.merge(\n        source=new_data,\n        predicate=\"target.x = source.x\",\n        source_alias=\"source\",\n        target_alias=\"target\")\n    .when_matched_update_all()\n    .execute()\n)\n{'num_source_rows': 1, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas()\n   x  y\n0  1  7\n1  2  5\n2  3  6\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_not_matched_by_source_delete","title":"when_not_matched_by_source_delete","text":"<pre><code>when_not_matched_by_source_delete(predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Delete a target row that has no matches in the source from the table only if the given <code>predicate</code> (if specified) is true for the target row.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to delete when not matched by source.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_not_matched_by_source_update","title":"when_not_matched_by_source_update","text":"<pre><code>when_not_matched_by_source_update(updates: Dict[str, str], predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Update a target row that has no matches in the source based on the rules defined by <code>updates</code>. If a <code>predicate</code> is specified, then it must evaluate to true for the row to be updated.</p> <p>Parameters:</p> Name Type Description Default <code>updates</code> <code>Dict[str, str]</code> <p>a mapping of column name to update SQL expression.</p> required <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to update.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [2, 3, 4]})\n\n(\n   dt.merge(\n       source=new_data,\n       predicate='target.x = source.x',\n       source_alias='source',\n       target_alias='target')\n   .when_not_matched_by_source_update(\n       predicate = \"y &gt; 3\",\n       updates = {\"y\": \"0\"})\n   .execute()\n)\n{'num_source_rows': 3, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas().sort_values(\"x\", ignore_index=True)\n   x  y\n0  1  0\n1  2  5\n2  3  6\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_not_matched_insert","title":"when_not_matched_insert","text":"<pre><code>when_not_matched_insert(updates: Dict[str, str], predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Insert a new row to the target table based on the rules defined by <code>updates</code>. If a <code>predicate</code> is specified, then it must evaluate to true for the new row to be inserted.</p> <p>Parameters:</p> Name Type Description Default <code>updates</code> <code>dict</code> <p>a mapping of column name to insert SQL expression.</p> required <code>predicate</code> <code>(str | None, Optional)</code> <p>SQL like predicate on when to insert.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [4], \"y\": [7]})\n\n(\n    dt.merge(\n        source=new_data,\n        predicate=\"target.x = source.x\",\n        source_alias=\"source\",\n        target_alias=\"target\",)\n    .when_not_matched_insert(\n        updates={\n            \"x\": \"source.x\",\n            \"y\": \"source.y\",\n        })\n    .execute()\n)\n{'num_source_rows': 1, 'num_target_rows_inserted': 1, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 3, 'num_output_rows': 4, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas().sort_values(\"x\", ignore_index=True)\n   x  y\n0  1  4\n1  2  5\n2  3  6\n3  4  7\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_not_matched_insert_all","title":"when_not_matched_insert_all","text":"<pre><code>when_not_matched_insert_all(predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Insert a new row to the target table, updating all source fields to target fields. Source and target are required to have the same field names. If a <code>predicate</code> is specified, then it must evaluate to true for the new row to be inserted.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to insert.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [4], \"y\": [7]})\n\n(\n   dt.merge(\n       source=new_data,\n       predicate='target.x = source.x',\n       source_alias='source',\n       target_alias='target')\n   .when_not_matched_insert_all()\n   .execute()\n)\n{'num_source_rows': 1, 'num_target_rows_inserted': 1, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 3, 'num_output_rows': 4, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas().sort_values(\"x\", ignore_index=True)\n   x  y\n0  1  4\n1  2  5\n2  3  6\n3  4  7\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.with_writer_properties","title":"with_writer_properties","text":"<pre><code>with_writer_properties(data_page_size_limit: Optional[int] = None, dictionary_page_size_limit: Optional[int] = None, data_page_row_count_limit: Optional[int] = None, write_batch_size: Optional[int] = None, max_row_group_size: Optional[int] = None) -&gt; TableMerger\n</code></pre> <p>Deprecated</p> <p>Use <code>.merge(writer_properties = WriterProperties())</code> instead</p> <p>Pass writer properties to the Rust parquet writer, see options https://arrow.apache.org/rust/parquet/file/properties/struct.WriterProperties.html:</p> <p>Parameters:</p> Name Type Description Default <code>data_page_size_limit</code> <code>Optional[int]</code> <p>Limit DataPage size to this in bytes.</p> <code>None</code> <code>dictionary_page_size_limit</code> <code>Optional[int]</code> <p>Limit the size of each DataPage to store dicts to this amount in bytes.</p> <code>None</code> <code>data_page_row_count_limit</code> <code>Optional[int]</code> <p>Limit the number of rows in each DataPage.</p> <code>None</code> <code>write_batch_size</code> <code>Optional[int]</code> <p>Splits internally to smaller batch size.</p> <code>None</code> <code>max_row_group_size</code> <code>Optional[int]</code> <p>Max number of rows in row group.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p>","boost":2},{"location":"api/delta_table/delta_table_optimizer/","title":"TableOptimizer","text":"","boost":10},{"location":"api/delta_table/delta_table_optimizer/#deltalake.table.TableOptimizer","title":"deltalake.table.TableOptimizer","text":"<pre><code>TableOptimizer(table: DeltaTable)\n</code></pre> <p>API for various table optimization commands.</p>","boost":10},{"location":"api/delta_table/delta_table_optimizer/#deltalake.table.TableOptimizer.compact","title":"compact","text":"<pre><code>compact(partition_filters: Optional[FilterType] = None, target_size: Optional[int] = None, max_concurrent_tasks: Optional[int] = None, min_commit_interval: Optional[Union[int, timedelta]] = None, writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Compacts small files to reduce the total number of files in the table.</p> <p>This operation is idempotent; if run twice on the same table (assuming it has not been updated) it will do nothing the second time.</p> <p>If this operation happens concurrently with any operations other than append, it will fail.</p> <p>Parameters:</p> Name Type Description Default <code>partition_filters</code> <code>Optional[FilterType]</code> <p>the partition filters that will be used for getting the matched files</p> <code>None</code> <code>target_size</code> <code>Optional[int]</code> <p>desired file size after bin-packing files, in bytes. If not             provided, will attempt to read the table configuration value <code>delta.targetFileSize</code>.             If that value isn't set, will use default value of 256MB.</p> <code>None</code> <code>max_concurrent_tasks</code> <code>Optional[int]</code> <p>the maximum number of concurrent tasks to use for                     file compaction. Defaults to number of CPUs. More concurrent tasks can make compaction                     faster, but will also use more memory.</p> <code>None</code> <code>min_commit_interval</code> <code>Optional[Union[int, timedelta]]</code> <p>minimum interval in seconds or as timedeltas before a new commit is                     created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you                     want a commit per partition.</p> <code>None</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Type Description <code>Dict[str, Any]</code> <p>the metrics from optimize</p> Example <p>Use a timedelta object to specify the seconds, minutes or hours of the interval. <pre><code>from deltalake import DeltaTable, write_deltalake\nfrom datetime import timedelta\nimport pyarrow as pa\n\nwrite_deltalake(\"tmp\", pa.table({\"x\": [1], \"y\": [4]}))\nwrite_deltalake(\"tmp\", pa.table({\"x\": [2], \"y\": [5]}), mode=\"append\")\n\ndt = DeltaTable(\"tmp\")\ntime_delta = timedelta(minutes=10)\ndt.optimize.compact(min_commit_interval=time_delta)\n{'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 1, 'numBatches': 2, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True}\n</code></pre></p>","boost":10},{"location":"api/delta_table/delta_table_optimizer/#deltalake.table.TableOptimizer.z_order","title":"z_order","text":"<pre><code>z_order(columns: Iterable[str], partition_filters: Optional[FilterType] = None, target_size: Optional[int] = None, max_concurrent_tasks: Optional[int] = None, max_spill_size: int = 20 * 1024 * 1024 * 1024, min_commit_interval: Optional[Union[int, timedelta]] = None, writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Reorders the data using a Z-order curve to improve data skipping.</p> <p>This also performs compaction, so the same parameters as compact() apply.</p> <p>Parameters:</p> Name Type Description Default <code>columns</code> <code>Iterable[str]</code> <p>the columns to use for Z-ordering. There must be at least one column.         partition_filters: the partition filters that will be used for getting the matched files</p> required <code>target_size</code> <code>Optional[int]</code> <p>desired file size after bin-packing files, in bytes. If not             provided, will attempt to read the table configuration value <code>delta.targetFileSize</code>.             If that value isn't set, will use default value of 256MB.</p> <code>None</code> <code>max_concurrent_tasks</code> <code>Optional[int]</code> <p>the maximum number of concurrent tasks to use for                     file compaction. Defaults to number of CPUs. More concurrent tasks can make compaction                     faster, but will also use more memory.</p> <code>None</code> <code>max_spill_size</code> <code>int</code> <p>the maximum number of bytes to spill to disk. Defaults to 20GB.</p> <code>20 * 1024 * 1024 * 1024</code> <code>min_commit_interval</code> <code>Optional[Union[int, timedelta]]</code> <p>minimum interval in seconds or as timedeltas before a new commit is                     created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you                     want a commit per partition.</p> <code>None</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Type Description <code>Dict[str, Any]</code> <p>the metrics from optimize</p> Example <p>Use a timedelta object to specify the seconds, minutes or hours of the interval. <pre><code>from deltalake import DeltaTable, write_deltalake\nfrom datetime import timedelta\nimport pyarrow as pa\n\nwrite_deltalake(\"tmp\", pa.table({\"x\": [1], \"y\": [4]}))\nwrite_deltalake(\"tmp\", pa.table({\"x\": [2], \"y\": [5]}), mode=\"append\")\n\ndt = DeltaTable(\"tmp\")\ntime_delta = timedelta(minutes=10)\ndt.optimize.z_order([\"x\"], min_commit_interval=time_delta)\n{'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 0, 'numBatches': 1, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True}\n</code></pre></p>","boost":10},{"location":"api/delta_table/metadata/","title":"Metadata","text":"","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata","title":"deltalake.Metadata  <code>dataclass</code>","text":"<pre><code>Metadata(table: RawDeltaTable)\n</code></pre> <p>Create a Metadata instance.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.configuration","title":"configuration  <code>property</code>","text":"<pre><code>configuration: Dict[str, str]\n</code></pre> <p>Return the DeltaTable properties.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.created_time","title":"created_time  <code>property</code>","text":"<pre><code>created_time: int\n</code></pre> <p>Return The time when this metadata action is created, in milliseconds since the Unix epoch of the DeltaTable.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.description","title":"description  <code>property</code>","text":"<pre><code>description: str\n</code></pre> <p>Return the user-provided description of the DeltaTable.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.id","title":"id  <code>property</code>","text":"<pre><code>id: int\n</code></pre> <p>Return the unique identifier of the DeltaTable.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.name","title":"name  <code>property</code>","text":"<pre><code>name: str\n</code></pre> <p>Return the user-provided identifier of the DeltaTable.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.partition_columns","title":"partition_columns  <code>property</code>","text":"<pre><code>partition_columns: List[str]\n</code></pre> <p>Return an array containing the names of the partitioned columns of the DeltaTable.</p>","boost":2},{"location":"how-delta-lake-works/architecture-of-delta-table/","title":"Architecture of a Delta Lake table","text":"<p>A Delta table consists of Parquet files that contain data and a transaction log that stores metadata about the transactions.</p> <p></p> <p>Let's create a Delta table, perform some operations, and inspect the files that are created.</p>"},{"location":"how-delta-lake-works/architecture-of-delta-table/#delta-lake-transaction-examples","title":"Delta Lake transaction examples","text":"<p>Start by creating a pandas DataFrame and writing it out to a Delta table.</p> <pre><code>import pandas as pd\nfrom deltalake import DeltaTable, write_deltalake\n\ndf = pd.DataFrame({\"num\": [1, 2, 3], \"letter\": [\"a\", \"b\", \"c\"]})\nwrite_deltalake(\"tmp/some-table\", df)\n</code></pre> <p>Now inspect the files created in storage:</p> <pre><code>tmp/some-table\n\u251c\u2500\u2500 0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\n\u2514\u2500\u2500 _delta_log\n    \u2514\u2500\u2500 00000000000000000000.json\n</code></pre> <p>The Parquet file stores the data that was written.  The <code>_delta_log</code> directory stores metadata about the transactions.  Let's inspect the <code>_delta_log/00000000000000000000.json</code> file.</p> <pre><code>{\n  \"protocol\": {\n    \"minReaderVersion\": 1,\n    \"minWriterVersion\": 1\n  }\n}\n{\n  \"metaData\": {\n    \"id\": \"b96ea1a2-1830-4da2-8827-5334cc6104ed\",\n    \"name\": null,\n    \"description\": null,\n    \"format\": {\n      \"provider\": \"parquet\",\n      \"options\": {}\n    },\n    \"schemaString\": \"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"num\\\",\\\"type\\\":\\\"long\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"letter\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\",\n    \"partitionColumns\": [],\n    \"createdTime\": 1701740315599,\n    \"configuration\": {}\n  }\n}\n{\n  \"add\": {\n    \"path\": \"0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\",\n    \"size\": 2208,\n    \"partitionValues\": {},\n    \"modificationTime\": 1701740315597,\n    \"dataChange\": true,\n    \"stats\": \"{\\\"numRecords\\\": 3, \\\"minValues\\\": {\\\"num\\\": 1, \\\"letter\\\": \\\"a\\\"}, \\\"maxValues\\\": {\\\"num\\\": 3, \\\"letter\\\": \\\"c\\\"}, \\\"nullCount\\\": {\\\"num\\\": 0, \\\"letter\\\": 0}}\"\n  }\n}\n{\n  \"commitInfo\": {\n    \"timestamp\": 1701740315602,\n    \"operation\": \"CREATE TABLE\",\n    \"operationParameters\": {\n      \"location\": \"file:///Users/matthew.powers/Documents/code/delta/delta-examples/notebooks/python-deltalake/tmp/some-table\",\n      \"metadata\": \"{\\\"configuration\\\":{},\\\"created_time\\\":1701740315599,\\\"description\\\":null,\\\"format\\\":{\\\"options\\\":{},\\\"provider\\\":\\\"parquet\\\"},\\\"id\\\":\\\"b96ea1a2-1830-4da2-8827-5334cc6104ed\\\",\\\"name\\\":null,\\\"partition_columns\\\":[],\\\"schema\\\":{\\\"fields\\\":[{\\\"metadata\\\":{},\\\"name\\\":\\\"num\\\",\\\"nullable\\\":true,\\\"type\\\":\\\"long\\\"},{\\\"metadata\\\":{},\\\"name\\\":\\\"letter\\\",\\\"nullable\\\":true,\\\"type\\\":\\\"string\\\"}],\\\"type\\\":\\\"struct\\\"}}\",\n      \"protocol\": \"{\\\"minReaderVersion\\\":1,\\\"minWriterVersion\\\":1}\",\n      \"mode\": \"ErrorIfExists\"\n    },\n    \"clientVersion\": \"delta-rs.0.17.0\"\n  }\n}\n</code></pre> <p>The tranasction log file contains the following information:</p> <ul> <li>the files added to the Delta table</li> <li>schema of the files</li> <li>column level metadata including the min/max value for each file</li> </ul> <p>Create another pandas DataFrame and append it to the Delta table to see how this transaction is recorded.</p> <pre><code>df = pd.DataFrame({\"num\": [8, 9], \"letter\": [\"dd\", \"ee\"]})\nwrite_deltalake(f\"{cwd}/tmp/delta-table\", df, mode=\"append\")\n</code></pre> <p>Here are the files in storage:</p> <pre><code>tmp/some-table\n\u251c\u2500\u2500 0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\n\u251c\u2500\u2500 1-57abb6fb-2249-43ba-a7be-cf09bcc230de-0.parquet\n\u2514\u2500\u2500 _delta_log\n    \u251c\u2500\u2500 00000000000000000000.json\n    \u2514\u2500\u2500 00000000000000000001.json\n</code></pre> <p>Here are the contents of the <code>_delta_log/00000000000000000001.json</code> file:</p> <pre><code>{\n  \"add\": {\n    \"path\": \"1-57abb6fb-2249-43ba-a7be-cf09bcc230de-0.parquet\",\n    \"size\": 2204,\n    \"partitionValues\": {},\n    \"modificationTime\": 1701740386169,\n    \"dataChange\": true,\n    \"stats\": \"{\\\"numRecords\\\": 2, \\\"minValues\\\": {\\\"num\\\": 8, \\\"letter\\\": \\\"dd\\\"}, \\\"maxValues\\\": {\\\"num\\\": 9, \\\"letter\\\": \\\"ee\\\"}, \\\"nullCount\\\": {\\\"num\\\": 0, \\\"letter\\\": 0}}\"\n  }\n}\n{\n  \"commitInfo\": {\n    \"timestamp\": 1701740386169,\n    \"operation\": \"WRITE\",\n    \"operationParameters\": {\n      \"partitionBy\": \"[]\",\n      \"mode\": \"Append\"\n    },\n    \"clientVersion\": \"delta-rs.0.17.0\"\n  }\n}\n</code></pre> <p>The transaction log records that the second file has been persisted in the Delta table.</p> <p>Now create a third pandas DataFrame and overwrite the Delta table with the new data.</p> <pre><code>df = pd.DataFrame({\"num\": [11, 22], \"letter\": [\"aa\", \"bb\"]})\nwrite_deltalake(f\"{cwd}/tmp/delta-table\", df, mode=\"append\")\n</code></pre> <p>Here are the files in storage:</p> <pre><code>tmp/some-table\n\u251c\u2500\u2500 0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\n\u251c\u2500\u2500 1-57abb6fb-2249-43ba-a7be-cf09bcc230de-0.parquet\n\u251c\u2500\u2500 2-95ef2108-480c-4b89-96f0-ff9185dab9ad-0.parquet\n\u2514\u2500\u2500 _delta_log\n    \u251c\u2500\u2500 00000000000000000000.json\n    \u251c\u2500\u2500 00000000000000000001.json\n    \u2514\u2500\u2500 00000000000000000002.json\n</code></pre> <p>Here are the contents of the <code>_delta_log/0002.json</code> file:</p> <pre><code>{\n  \"add\": {\n    \"path\": \"2-95ef2108-480c-4b89-96f0-ff9185dab9ad-0.parquet\",\n    \"size\": 2204,\n    \"partitionValues\": {},\n    \"modificationTime\": 1701740465102,\n    \"dataChange\": true,\n    \"stats\": \"{\\\"numRecords\\\": 2, \\\"minValues\\\": {\\\"num\\\": 11, \\\"letter\\\": \\\"aa\\\"}, \\\"maxValues\\\": {\\\"num\\\": 22, \\\"letter\\\": \\\"bb\\\"}, \\\"nullCount\\\": {\\\"num\\\": 0, \\\"letter\\\": 0}}\"\n  }\n}\n{\n  \"remove\": {\n    \"path\": \"0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\",\n    \"deletionTimestamp\": 1701740465102,\n    \"dataChange\": true,\n    \"extendedFileMetadata\": false,\n    \"partitionValues\": {},\n    \"size\": 2208\n  }\n}\n{\n  \"remove\": {\n    \"path\": \"1-57abb6fb-2249-43ba-a7be-cf09bcc230de-0.parquet\",\n    \"deletionTimestamp\": 1701740465102,\n    \"dataChange\": true,\n    \"extendedFileMetadata\": false,\n    \"partitionValues\": {},\n    \"size\": 2204\n  }\n}\n{\n  \"commitInfo\": {\n    \"timestamp\": 1701740465102,\n    \"operation\": \"WRITE\",\n    \"operationParameters\": {\n      \"mode\": \"Overwrite\",\n      \"partitionBy\": \"[]\"\n    },\n    \"clientVersion\": \"delta-rs.0.17.0\"\n  }\n}\n</code></pre> <p>This transaction adds a data file and marks the two exising data files for removal.  Marking a file for removal in the transaction log is known as \"tombstoning the file\" or a \"logical delete\".  This is different from a \"physical delete\" which actually removes the data file from storage.</p>"},{"location":"how-delta-lake-works/architecture-of-delta-table/#how-delta-table-operations-differ-from-data-lakes","title":"How Delta table operations differ from data lakes","text":"<p>Data lakes consist of data files persisted in storage.  They don't have a transaction log that retain metadata about the transactions.</p> <p>Data lakes perform transactions differently than Delta tables.</p> <p>When you perform an overwrite tranasction with a Delta table, you logically delete the exiting data without physically removing it.</p> <p>Data lakes don't support logical deletes, so you have to physically delete the data from storage.</p> <p>Logical data operations are safer because they can be rolled back if they don't complete successfully.  Physically removing data from storage can be dangerous, especially if it's before a transaction is complete.</p> <p>We're now ready to look into Delta Lake ACID transactions in more detail.</p>"},{"location":"integrations/delta-lake-arrow/","title":"Delta Lake Arrow Integrations","text":"<p>Delta Lake tables can be exposed as Arrow tables and Arrow datasets, which allows for interoperability with a variety of query engines.</p> <p>This page shows you how to convert Delta tables to Arrow data structures and teaches you the difference between Arrow tables and Arrow datasets.  Tables are \"eager\" and datasets are \"lazy\", which has important performance implications, keep reading to learn more!</p>"},{"location":"integrations/delta-lake-arrow/#delta-lake-to-arrow-dataset","title":"Delta Lake to Arrow Dataset","text":"<p>Delta tables can easily be exposed as Arrow datasets.  This makes it easy for any query engine that can read Arrow datasets to read a Delta table.</p> <p>Let's take a look at the h2o groupby dataset that contains 9 columns of data.  Here are three representative rows of data:</p> <pre><code>+-------+-------+--------------+-------+-------+--------+------+------+---------+\n| id1   | id2   | id3          |   id4 |   id5 |    id6 |   v1 |   v2 |      v3 |\n|-------+-------+--------------+-------+-------+--------+------+------+---------|\n| id016 | id046 | id0000109363 |    88 |    13 | 146094 |    4 |    6 | 18.8377 |\n| id039 | id087 | id0000466766 |    14 |    30 | 111330 |    4 |   14 | 46.7973 |\n| id047 | id098 | id0000307804 |    85 |    23 | 187639 |    3 |    5 | 47.5773 |\n+-------+-------+--------------+-------+-------+--------+------+------+---------+\n</code></pre> <p>Here's how to expose the Delta table as a PyArrow dataset and run a query with DuckDB:</p> <pre><code>import duckdb\nfrom deltalake import DeltaTable\n\ntable = DeltaTable(\"delta/G1_1e9_1e2_0_0\")\ndataset = table.to_pyarrow_dataset()\nquack = duckdb.arrow(dataset)\nquack.filter(\"id1 = 'id016' and v2 &gt; 10\")\n</code></pre> <p>Here's the result:</p> <pre><code>\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502   id1   \u2502   id2   \u2502     id3      \u2502  id4  \u2502  id5  \u2502   id6   \u2502  v1   \u2502  v2   \u2502    v3     \u2502\n\u2502 varchar \u2502 varchar \u2502   varchar    \u2502 int32 \u2502 int32 \u2502  int32  \u2502 int32 \u2502 int32 \u2502  double   \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 id016   \u2502 id054   \u2502 id0002309114 \u2502    62 \u2502    95 \u2502 7180859 \u2502     4 \u2502    13 \u2502  7.750173 \u2502\n\u2502 id016   \u2502 id044   \u2502 id0003968533 \u2502    63 \u2502    98 \u2502 2356363 \u2502     4 \u2502    14 \u2502  3.942417 \u2502\n\u2502 id016   \u2502 id034   \u2502 id0001082839 \u2502    58 \u2502    73 \u2502 8039808 \u2502     5 \u2502    12 \u2502 76.820135 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 ? rows (&gt;9999 rows, 3 shown)                                                 9 columns \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>Arrow datasets allow for the predicates to get pushed down to the query engine, so the query is executed quickly.</p>"},{"location":"integrations/delta-lake-arrow/#delta-lake-to-arrow-table","title":"Delta Lake to Arrow Table","text":"<p>You can also run the same query with DuckDB on an Arrow table:</p> <pre><code>quack = duckdb.arrow(table.to_pyarrow_table())\nquack.filter(\"id1 = 'id016' and v2 &gt; 10\")\n</code></pre> <p>This returns the same result, but it runs slower.</p>"},{"location":"integrations/delta-lake-arrow/#difference-between-arrow-dataset-and-arrow-table","title":"Difference between Arrow Dataset and Arrow Table","text":"<p>Arrow Datasets are lazy and allow for full predicate pushdown unlike Arrow tables which are eagerly loaded into memory.</p> <p>The previous DuckDB queries were run on a 1 billion row dataset that's roughly 50 GB when stored as an uncompressed CSV file.  Here are the runtimes when the data is stored in a Delta table and the queries are executed on a 2021 Macbook M1 with 64 GB of RAM:</p> <ul> <li>Arrow table: 17.1 seconds</li> <li>Arrow dataset: 0.01 seconds</li> </ul> <p>The query runs much faster on an Arrow dataset because the predicates can be pushed down to the query engine and lots of data can be skipped.</p> <p>Arrow tables are eagerly materialized in memory and don't allow for the same amount of data skipping.</p>"},{"location":"integrations/delta-lake-arrow/#multiple-query-engines-can-query-arrow-datasets","title":"Multiple query engines can query Arrow Datasets","text":"<p>Other query engines like DataFusion can also query Arrow datasets, see the following example:</p> <pre><code>from datafusion import SessionContext\n\nctx = SessionContext()\nctx.register_dataset(\"my_dataset\", table.to_pyarrow_dataset())\nctx.sql(\"select * from my_dataset where v2 &gt; 5\")\n</code></pre> <p>Here's the result:</p> <pre><code>+-------+-------+--------------+-----+-----+--------+----+----+-----------+\n| id1   | id2   | id3          | id4 | id5 | id6    | v1 | v2 | v3        |\n+-------+-------+--------------+-----+-----+--------+----+----+-----------+\n| id082 | id049 | id0000022715 | 97  | 55  | 756924 | 2  | 11 | 74.161136 |\n| id053 | id052 | id0000113549 | 19  | 56  | 139048 | 1  | 10 | 95.178444 |\n| id090 | id043 | id0000637409 | 94  | 50  | 12448  | 3  | 12 | 60.21896  |\n+-------+-------+--------------+-----+-----+--------+----+----+-----------+\n</code></pre> <p>Any query engine that's capable of reading an Arrow table/dataset can read a Delta table.</p>"},{"location":"integrations/delta-lake-arrow/#conclusion","title":"Conclusion","text":"<p>Delta tables can easily be exposed as Arrow tables/datasets.</p> <p>Therefore any query engine that can read an Arrow table/dataset can also read a Delta table.</p> <p>Arrow datasets allow for more predicates to be pushed down to the query engine, so they can perform better performance than Arrow tables.</p>"},{"location":"integrations/delta-lake-datafusion/","title":"Using Delta Lake with DataFusion","text":"<p>This page explains how to use Delta Lake with DataFusion.</p> <p>Delta Lake offers DataFusion users better performance and more features compared to other formats like CSV or Parquet.</p> <p>Delta Lake works well with the DataFusion Rust API and the DataFusion Python API.  It's a great option for all DataFusion users.</p> <p>Delta Lake also depends on DataFusion to implement SQL-related functionality under the hood.  We will also discuss this dependency at the end of this guide in case you're interested in learning more about the symbiotic relationship between the two libraries.</p>"},{"location":"integrations/delta-lake-datafusion/#delta-lake-performance-benefits-for-datafusion-users","title":"Delta Lake performance benefits for DataFusion users","text":"<p>Let's run some DataFusion queries on a Parquet file and a Delta table with the same data to learn more about the performance benefits of Delta Lake.</p> <p>Suppose you have the following dataset with 1 billion rows and 9 columns.  Here are the first three rows of data:</p> <pre><code>+-------+-------+--------------+-------+-------+--------+------+------+---------+\n| id1   | id2   | id3          |   id4 |   id5 |    id6 |   v1 |   v2 |      v3 |\n|-------+-------+--------------+-------+-------+--------+------+------+---------|\n| id016 | id046 | id0000109363 |    88 |    13 | 146094 |    4 |    6 | 18.8377 |\n| id039 | id087 | id0000466766 |    14 |    30 | 111330 |    4 |   14 | 46.7973 |\n| id047 | id098 | id0000307804 |    85 |    23 | 187639 |    3 |    5 | 47.5773 |\n+-------+-------+--------------+-------+-------+--------+------+------+---------+\n</code></pre> <p>Here's how to register a Delta Lake table as a PyArrow dataset:</p> <pre><code>from datafusion import SessionContext\nfrom deltalake import DeltaTable\n\nctx = SessionContext()\ntable = DeltaTable(\"G1_1e9_1e2_0_0\")\nctx.register_dataset(\"my_delta_table\", table.to_pyarrow_dataset())\n</code></pre> <p>Now query the table:</p> <pre><code>ctx.sql(\"select id1, sum(v1) as v1 from my_delta_table where id1='id096' group by id1\")\n</code></pre> <p>That query takes 2.8 seconds to execute.</p> <p>Let's register the same dataset as a Parquet table, run the same query, and compare the runtime difference.</p> <p>Register the Parquet table and run the query:</p> <pre><code>path = \"G1_1e9_1e2_0_0.parquet\"\nctx.register_parquet(\"my_parquet_table\", path)\nctx.sql(\"select id1, sum(v1) as v1 from my_parquet_table where id1='id096' group by id1\")\n</code></pre> <p>This query takes 5.3 seconds to run.</p> <p>Parquet stores data in row groups and DataFusion can intelligently skip row groups that don't contain relevant data, so the query is faster than a file format like CSV which doesn't support row group skipping.</p> <p>Delta Lake stores file-level metadata information in the transaction log, so it can skip entire files when queries are executed.  Delta Lake can skip entire files and then skip row groups within the individual files.  This makes Delta Lake even faster than Parquet files, especially for larger datasets spread across many files.</p>"},{"location":"integrations/delta-lake-datafusion/#delta-lake-features-for-datafusion-users","title":"Delta Lake features for DataFusion users","text":"<p>Delta Lake also provides other features that are useful for DataFusion users like ACID transactions, concurrency protection, time travel, versioned data, and more.</p>"},{"location":"integrations/delta-lake-datafusion/#why-delta-lake-depends-on-datafusion","title":"Why Delta Lake depends on DataFusion","text":"<p>Delta Lake depends on DataFusion to provide some end-user features.</p> <p>DataFusion is useful in providing SQL-related Delta Lake features. Some examples:</p> <ul> <li>Update and merge are written in terms of SQL expressions.</li> <li>Invariants and constraints are written in terms of SQL expressions.</li> </ul> <p>Anytime we have to evaluate SQL, we need some sort of SQL engine.  We use DataFusion for that.</p>"},{"location":"integrations/delta-lake-datafusion/#conclusion","title":"Conclusion","text":"<p>Delta Lake is a great file format for DataFusion users.</p> <p>Delta Lake also uses DataFusion to provide some end-user features.</p> <p>DataFusion and Delta Lake have a wonderful symbiotic relationship and play very nicely with each other.</p> <p>See this guide for more information on Delta Lake and PyArrow and why PyArrow Datasets are often a better option than PyArrow tables.</p>"},{"location":"integrations/delta-lake-pandas/","title":"Using Delta Lake with pandas","text":"<p>Delta Lake is a great storage system for pandas analyses.  This page shows how it's easy to use Delta Lake with pandas, the unique features Delta Lake offers pandas users, and how Delta Lake can make your pandas analyses run faster.</p> <p>Delta Lake is very easy to install for pandas analyses, just run <code>pip install deltalake</code>.</p> <p>Delta Lake allows for performance optimizations, so pandas queries can run much faster than the query run on data stored in CSV or Parquet.  See the following chart for the query runtime for the a Delta tables compared with CSV/Parquet.</p> <p></p> <p>Z Ordered Delta tables run this query much faster than when the data is stored in Parquet or CSV.  Let's dive in deeper and see how Delta Lake makes pandas faster.</p>"},{"location":"integrations/delta-lake-pandas/#delta-lake-makes-pandas-queries-run-faster","title":"Delta Lake makes pandas queries run faster","text":"<p>There are a few reasons Delta Lake can make pandas queries run faster:</p> <ol> <li>column pruning: only grabbing the columns relevant for a query</li> <li>file skipping: only reading files with data for the query</li> <li>row group skipping: only reading row groups with data for the query</li> <li>Z ordering data: colocating similar data in the same files, so file skipping is more effective</li> </ol> <p>Reading less data (fewer columns and/or fewer rows) is how Delta Lake makes pandas queries run faster.</p> <p>Parquet allows for column pruning and row group skipping, but doesn't support file-level skipping or Z Ordering.  CSV doesn't support any of these performance optimizations.</p> <p>Let's take a look at a sample dataset and run a query to see the performance enhancements offered by Delta Lake.</p> <p>Suppose you have a 1 billion row dataset with 9 columns, here are the first three rows of the dataset:</p> <pre><code>+-------+-------+--------------+-------+-------+--------+------+------+---------+\n| id1   | id2   | id3          |   id4 |   id5 |    id6 |   v1 |   v2 |      v3 |\n|-------+-------+--------------+-------+-------+--------+------+------+---------|\n| id016 | id046 | id0000109363 |    88 |    13 | 146094 |    4 |    6 | 18.8377 |\n| id039 | id087 | id0000466766 |    14 |    30 | 111330 |    4 |   14 | 46.7973 |\n| id047 | id098 | id0000307804 |    85 |    23 | 187639 |    3 |    5 | 47.5773 |\n+-------+-------+--------------+-------+-------+--------+------+------+---------+\n</code></pre> <p>The dataset is roughly 50 GB when stored as an uncompressed CSV files.  Let's run some queries on a 2021 Macbook M1 with 64 GB of RAM.</p> <p>Start by running the query on an uncompressed CSV file:</p> <pre><code>(\n    pd.read_csv(f\"{Path.home()}/data/G1_1e9_1e2_0_0.csv\", usecols=[\"id1\", \"id2\", \"v1\"])\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>This query takes 234 seconds to execute.  It runs out of memory if the <code>usecols</code> parameter is not set.</p> <p>Now let's convert the CSV dataset to Parquet and run the same query on the data stored in a Parquet file.</p> <pre><code>(\n    pd.read_parquet(\n        f\"{Path.home()}/data/G1_1e9_1e2_0_0.parquet\", columns=[\"id1\", \"id2\", \"v1\"]\n    )\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>This query takes 118 seconds to execute.</p> <p>Parquet stores data in row groups and allows for skipping when the <code>filters</code> predicates are set.  Run the Parquet query again with row group skipping enabled:</p> <pre><code>(\n    pd.read_parquet(\n        f\"{Path.home()}/data/G1_1e9_1e2_0_0.parquet\",\n        columns=[\"id1\", \"id2\", \"v1\"],\n        filters=[(\"id1\", \"==\", \"id016\")],\n    )\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>This query runs in 19 seconds.  Lots of row groups can be skipped for this particular query.</p> <p>Now let's run the same query on a Delta table to see the out-of-the box performance:</p> <pre><code>(\n    DeltaTable(f\"{Path.home()}/data/deltalake_baseline_G1_1e9_1e2_0_0\", version=0)\n    .to_pandas(filters=[(\"id1\", \"==\", \"id016\")], columns=[\"id1\", \"id2\", \"v1\"])\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>This query runs in 8 seconds, which is a significant performance enhancement.</p> <p>Now let's Z Order the Delta table by <code>id1</code> which will make the data skipping even better.  Run the query again on the Z Ordered Delta table:</p> <pre><code>(\n    DeltaTable(f\"{Path.home()}/data/deltalake_baseline_G1_1e9_1e2_0_0\", version=1)\n    .to_pandas(filters=[(\"id1\", \"==\", \"id016\")], columns=[\"id1\", \"id2\", \"v1\"])\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>The query now executes in 2.4 seconds.</p> <p>Delta tables can make certain pandas queries run much faster.</p>"},{"location":"integrations/delta-lake-pandas/#delta-lake-lets-pandas-users-time-travel","title":"Delta Lake lets pandas users time travel","text":"<p>Start by creating a Delta table:</p> <pre><code>from deltalake import write_deltalake, DeltaTable\n\ndf = pd.DataFrame({\"num\": [1, 2, 3], \"letter\": [\"a\", \"b\", \"c\"]})\nwrite_deltalake(\"tmp/some-table\", df)\n</code></pre> <p>Here are the contents of the Delta table (version 0 of the Delta table):</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n+-------+----------+\n</code></pre> <p>Now append two rows to the Delta table:</p> <pre><code>df = pd.DataFrame({\"num\": [8, 9], \"letter\": [\"dd\", \"ee\"]})\nwrite_deltalake(\"tmp/some-table\", df, mode=\"append\")\n</code></pre> <p>Here are the contents after the append operation (version 1 of the Delta table):</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre> <p>Now perform an overwrite transaction:</p> <pre><code>df = pd.DataFrame({\"num\": [11, 22], \"letter\": [\"aa\", \"bb\"]})\nwrite_deltalake(\"tmp/some-table\", df, mode=\"overwrite\")\n</code></pre> <p>Here are the contents after the overwrite operation (version 2 of the Delta table):</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre> <p>Read in the Delta table and it will grab the latest version by default:</p> <pre><code>DeltaTable(\"tmp/some-table\").to_pandas()\n\n+-------+----------+\n|   num | letter   |\n|-------+----------|\n|    11 | aa       |\n|    22 | bb       |\n+-------+----------+\n</code></pre> <p>You can easily time travel back to version 0 of the Delta table:</p> <pre><code>DeltaTable(\"tmp/some-table\", version=0).to_pandas()\n\n+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n+-------+----------+\n</code></pre> <p>You can also time travel to version 1 of the Delta table:</p> <pre><code>DeltaTable(\"tmp/some-table\", version=1).to_pandas()\n\n+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre> <p>Time travel is a powerful feature that pandas users cannot access with CSV or Parquet.</p>"},{"location":"integrations/delta-lake-pandas/#schema-enforcement","title":"Schema enforcement","text":"<p>Delta tables only allow you to append DataFrame with matching schema by default.  Suppose you have a DataFrame with <code>num</code> and <code>animal</code> columns, which is different from the Delta table that has columns with <code>num</code> and <code>letter</code> columns.</p> <p>Try to append this DataFrame with a mismatched schema to the existing table:</p> <pre><code>df = pd.DataFrame({\"num\": [5, 6], \"animal\": [\"cat\", \"dog\"]})\nwrite_deltalake(\"tmp/some-table\", df)\n</code></pre> <p>This transaction will be rejected and will return the following error message:</p> <pre><code>ValueError: Schema of data does not match table schema\nData schema:\nnum: int64\nanimal: string\n-- schema metadata --\npandas: '{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"' + 474\nTable Schema:\nnum: int64\nletter: string\n</code></pre> <p>Schema enforcement protects your table from getting corrupted by appending data with mismatched schema.  Parquet and CSV don't offer schema enforcement for pandas users.</p>"},{"location":"integrations/delta-lake-pandas/#overwriting-schema-of-table","title":"Overwriting schema of table","text":"<p>You can overwrite the table contents and schema by setting the <code>overwrite_schema</code> option.  Here's how to overwrite the table contents:</p> <pre><code>write_deltalake(\"tmp/some-table\", df, mode=\"overwrite\", overwrite_schema=True)\n</code></pre> <p>Here are the contents of the table after the values and schema have been overwritten:</p> <pre><code>+-------+----------+\n|   num | animal   |\n|-------+----------|\n|     5 | cat      |\n|     6 | dog      |\n+-------+----------+\n</code></pre>"},{"location":"integrations/delta-lake-pandas/#in-memory-vs-in-storage-data-changes","title":"In-memory vs. in-storage data changes","text":"<p>It's important to distinguish between data stored in-memory and data stored on disk when understanding the functionality offered by Delta Lake.</p> <p>pandas loads data from storage (CSV, Parquet, or Delta Lake) into in-memory DataFrames.</p> <p>pandas makes it easy to modify the data in memory, say update a column value.  It's not easy to update a column value in storage systems like CSV or Parquet using pandas.</p> <p>Delta Lake makes it easy for pandas users to update data in storage.</p>"},{"location":"integrations/delta-lake-pandas/#why-delta-lake-allows-for-faster-queries","title":"Why Delta Lake allows for faster queries","text":"<p>Delta tables store data in many files and metadata about the files in the transaction log.  Delta Lake allows for certain queries to skip entire files, which makes pandas queries run much faster.</p>"},{"location":"integrations/delta-lake-pandas/#more-resources","title":"More resources","text":"<p>See this talk on why Delta Lake is the best file format for pandas analyses to learn more: </p>"},{"location":"integrations/delta-lake-pandas/#conclusion","title":"Conclusion","text":"<p>Delta Lake provides many features that make it an excellent format for pandas analyses:</p> <ul> <li>performance optimizations make pandas queries run faster</li> <li>data management features make pandas analyses more reliable</li> <li>advanced features allow you to perform more complex pandas analyses</li> </ul> <p>Python deltalake offers pandas users a better experience compared with CSV/Parquet.</p>"},{"location":"integrations/delta-lake-polars/","title":"Using Delta Lake with polars","text":"<p>This page explains why Delta Lake is a great storage system for Polars analyses.</p> <p>You will learn how to create Delta tables with Polars, how to query Delta tables with Polars, and the unique advantages Delta Lake offers the Polars community.</p> <p>Here are some amazing benefits that Delta Lake provides Polars users:</p> <ul> <li>time travel</li> <li>ACID transactions for reliable writes</li> <li>better performance with file skipping</li> <li>enhanced file skipping via Z Ordering</li> <li>ability to rollback mistakes</li> <li>and many, many more</li> </ul> <p>Let's start by showing how to use Polars with Delta Lake, explore how Delta Lake can make Polars queries run faster, and then look at all the cool features Delta Lake offers Polars users.</p>"},{"location":"integrations/delta-lake-polars/#creating-a-delta-lake-table-with-polars","title":"Creating a Delta Lake table with Polars","text":"<p>Create a Polars DataFrame and write it out to a Delta table:</p> <pre><code>import polars as pl\n\ndf = pl.DataFrame({\"x\": [1, 2, 3]})\ndf.write_delta(\"tmp/bear_delta_lake\")\n</code></pre> <p>Inspect the contents of the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\"))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 1   |\n| 2   |\n| 3   |\n+-----+\n</code></pre> <p>Now create another Polars DataFrame and append it to the existing Delta table:</p> <pre><code>df2 = pl.DataFrame({\"x\": [8, 9, 10]})\ndf2.write_delta(\"tmp/bear_delta_lake\", mode=\"append\")\n</code></pre> <p>Re-inspect the contents of the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\"))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 1   |\n| 2   |\n| 3   |\n| 8   |\n| 9   |\n| 10  |\n+-----+\n</code></pre> <p>Now overwrite the existing Delta table:</p> <pre><code>df3 = pl.DataFrame({\"x\": [55, 66, 77]})\ndf3.write_delta(\"tmp/bear_delta_lake\", mode=\"overwrite\")\n</code></pre> <p>Inspect the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\"))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 55  |\n| 66  |\n| 77  |\n+-----+\n</code></pre> <p>The Delta table now has three versions, as shown in the following diagram:</p> <p></p>"},{"location":"integrations/delta-lake-polars/#time-travel-with-delta-lake-for-polars","title":"Time travel with Delta Lake for Polars","text":"<p>Time travel back to version 0 of the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\", version=0))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 1   |\n| 2   |\n| 3   |\n+-----+\n</code></pre> <p>Time travel back to version 1 of the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\", version=1))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 1   |\n| 2   |\n| 3   |\n| 9   |\n| 8   |\n| 10  |\n+-----+\n</code></pre> <p>Read the Delta table wihout specifying a version and see how it reads the latest version by default:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\"))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 55  |\n| 66  |\n| 77  |\n+-----+\n</code></pre> <p>Let's dive into how to read Delta tables with Polars in more detail and compare the query runtime performance on larger datasets.</p>"},{"location":"integrations/delta-lake-polars/#reading-a-delta-lake-table-with-polars","title":"Reading a Delta Lake table with Polars","text":"<p>Let's look at the h2o groupby dataset that has 1 billion rows and 9 columns.  Here are the first three rows of the dataset:</p> <pre><code>+-------+-------+--------------+-------+-------+--------+------+------+---------+\n| id1   | id2   | id3          |   id4 |   id5 |    id6 |   v1 |   v2 |      v3 |\n|-------+-------+--------------+-------+-------+--------+------+------+---------|\n| id016 | id046 | id0000109363 |    88 |    13 | 146094 |    4 |    6 | 18.8377 |\n| id039 | id087 | id0000466766 |    14 |    30 | 111330 |    4 |   14 | 46.7973 |\n| id047 | id098 | id0000307804 |    85 |    23 | 187639 |    3 |    5 | 47.5773 |\n+-------+-------+--------------+-------+-------+--------+------+------+---------+\n</code></pre> <p>This dataset is 50GB when stored in an uncompressed CSV file.  Let's run some queries on this dataset when it's stored in different file formats with Polars.</p> <p>This section will show the runtime for a query when the data is stored in CSV, Parquet, and Delta Lake and explain why Delta tables are the fastest.</p> <p></p> <p>Start by running a query on an uncompressed CSV file with <code>read_csv</code>:</p> <pre><code>pl.read_csv(\"~/data/G1_1e9_1e2_0_0.csv\").filter(pl.col(\"id1\") &lt; \"id016\").group_by(\n    [\"id1\", \"id2\"]\n).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query errors out after running for several minutes.  The machine runs out of memory.  Let's try it again with <code>scan_csv</code>.</p> <pre><code>pl.scan_csv(\"~/data/G1_1e9_1e2_0_0.csv\").filter(pl.col(\"id1\") &lt; \"id016\").group_by(\n    [\"id1\", \"id2\"]\n).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query runs in 56.2 seconds.</p> <p>Now let's run the same query when the data is stored in a Parquet file:</p> <pre><code>pl.scan_parquet(\"~/data/G1_1e9_1e2_0_0.parquet\").filter(\n    pl.col(\"id1\") &lt; \"id016\"\n).group_by([\"id1\", \"id2\"]).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query runs in 8.3 seconds.  It's much faster because Polars is optimized to skip row groups in Parquet files that don't contain data that's relevant for the query.</p> <p>Then run the query on newly created Delta table:</p> <pre><code>pl.scan_delta(\"~/data/deltalake/G1_1e9_1e2_0_0\", version=1).filter(\n    pl.col(\"id1\") &lt; \"id016\"\n).group_by([\"id1\", \"id2\"]).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query runs in 7.2 seconds.  Polars can run this query faster because it can inspect the Delta transaction log and skip entire files that don't contain relevant data before performing the ordinary Parquet row group skipping.</p> <p>Finally run the query on the Delta table after it has been Z Ordered by <code>id1</code>:</p> <pre><code>pl.scan_delta(\"~/data/deltalake/G1_1e9_1e2_0_0\", version=2).filter(\n    pl.col(\"id1\") &lt; \"id016\"\n).group_by([\"id1\", \"id2\"]).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query runs in 3.5 seconds.  The query on the Z Ordered Delta table is even faster because similar data has been co-located in the same files.  This allows for even greater data skipping.</p> <p>Polars can leverage file skipping to query Delta tables very quickly.</p>"},{"location":"integrations/delta-lake-polars/#why-polars-is-fast-with-delta-lake","title":"Why Polars is fast with Delta Lake","text":"<p>Delta tables consist of metadata in a transaction log and data stored in Parquet files.</p> <p>When Polars queries a Delta table, it starts by consulting the transaction log to understand the metadata of each file in the Delta table.  This allows for Polars to quickly identify which files should be skipped by the query.</p> <p>CSV files don't contain any such metadata, so file skipping isn't an option.  Polars can skip Parquet files based on metadata, but it needs to open up each file and read the metadata, which is slower that grabbing the file-level metadata directly from the transaction log.</p> <p>Parquet doesn't allow users to easily Z Order the data and colocate similar data in the same row groups.  The Z Order optimizations are only supported in Delta tables.</p> <p>Delta Lake offers Polars users with unique performance optimizations.</p>"},{"location":"integrations/delta-lake-polars/#other-delta-lake-features-relevant-for-polars-users","title":"Other Delta Lake features relevant for Polars users","text":"<ul> <li>ACID transactions for reliable writes</li> <li>better performance with file skipping</li> <li>enhanced file skipping via Z Ordering</li> <li>ability to rollback mistakes</li> </ul>"},{"location":"integrations/delta-lake-polars/#conclusion","title":"Conclusion","text":"<p>This guide shows how Delta Lake is a great storage format for Polars analyses.</p> <p>Delta Lake is easy to use, fast, and full of features that are great for Polars users.</p>"},{"location":"usage/appending-overwriting-delta-lake-table/","title":"Appending to and overwriting a Delta Lake table","text":"<p>This section explains how to append to an exising Delta table and how to overwrite a Delta table.</p>"},{"location":"usage/appending-overwriting-delta-lake-table/#delta-lake-append-transactions","title":"Delta Lake append transactions","text":"<p>Suppose you have a Delta table with the following contents:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n+-------+----------+\n</code></pre> <p>Append two additional rows of data to the table:</p> <pre><code>from deltalake import write_deltalake, DeltaTable\n\ndf = pd.DataFrame({\"num\": [8, 9], \"letter\": [\"dd\", \"ee\"]})\nwrite_deltalake(\"tmp/some-table\", df, mode=\"append\")\n</code></pre> <p>Here are the updated contents of the Delta table:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre> <p>Now let's see how to perform an overwrite transaction.</p>"},{"location":"usage/appending-overwriting-delta-lake-table/#delta-lake-overwrite-transactions","title":"Delta Lake overwrite transactions","text":"<p>Now let's see how to overwrite the exisitng Delta table.</p> <pre><code>df = pd.DataFrame({\"num\": [11, 22], \"letter\": [\"aa\", \"bb\"]})\nwrite_deltalake(\"tmp/some-table\", df, mode=\"overwrite\")\n</code></pre> <p>Here are the contents of the Delta table after the overwrite operation:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|    11 | aa       |\n|    22 | bb       |\n+-------+----------+\n</code></pre> <p>Overwriting just performs a logical delete.  It doesn't physically remove the previous data from storage.  Time travel back to the previous version to confirm that the old version of the table is still accessable.</p> <pre><code>dt = DeltaTable(\"tmp/some-table\", version=1)\n\n+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre>"},{"location":"usage/constraints/","title":"Adding a Constraint to a table","text":"<p>Check constraints are a way to enforce that only data that meets the constraint is allowed to be added to the table.</p>"},{"location":"usage/constraints/#add-the-constraint","title":"Add the Constraint","text":"Python Rust <p> <code>DeltaTable</code> <pre><code>from deltalake import DeltaTable\n\ndt = DeltaTable(\"../rust/tests/data/simple_table\")\n\n# Check the schema before hand\nprint(dt.schema())\n# Add the constraint to the table.\ndt.alter.add_constraint({\"id_gt_0\": \"id &gt; 0\"})\n</code></pre></p> <p> <code>DeltaTable</code> <pre><code>let table = deltalake::open_table(\"../rust/tests/data/simple_table\").await?;\nlet ops = DeltaOps(table);\nops.with_constraint(\"id_gt_0\", \"id &gt; 0\").await?;\n</code></pre></p> <p>After you have added the constraint to the table attempting to append data to the table that violates the constraint will instead throw an error.</p>"},{"location":"usage/constraints/#verify-the-constraint-by-trying-to-add-some-data","title":"Verify the constraint by trying to add some data","text":"Python Rust <pre><code>from deltalake import write_deltalake\nimport pandas as pd\n\ndf = pd.DataFrame({\"id\": [-1]})\nwrite_deltalake(dt, df, mode=\"append\", engine=\"rust\")\n# _internal.DeltaProtocolError: Invariant violations: [\"Check or Invariant (id &gt; 0) violated by value in row: [-1]\"]\n</code></pre> <pre><code>let table = deltalake::open_table(\"../rust/tests/data/simple_table\").await?;\nlet schema = table.get_state().arrow_schema()?;\nlet invalid_values: Vec&lt;Arc&lt;dyn Array&gt;&gt; = vec![\n    Arc::new(Int32Array::from(vec![-10]))\n];\nlet batch = RecordBatch::try_new(schema, invalid_values)?;\ntable.write(vec![batch]).await?;\n</code></pre> <p>Note: ensure you use the <code>engine='rust'</code> parameter when writing to the table as this feature is not supported in the default pyarrow writer. </p>"},{"location":"usage/create-delta-lake-table/","title":"Creating a Delta Lake Table","text":"<p>This section explains how to create a Delta Lake table.</p> <p>You can easily write a DataFrame to a Delta table.</p> <pre><code>from deltalake import write_deltalake\nimport pandas as pd\n\ndf = pd.DataFrame({\"num\": [1, 2, 3], \"letter\": [\"a\", \"b\", \"c\"]})\nwrite_deltalake(\"tmp/some-table\", df)\n</code></pre> <p>Here are the contents of the Delta table in storage:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n+-------+----------+\n</code></pre>"},{"location":"usage/deleting-rows-from-delta-lake-table/","title":"Deleting rows from a Delta Lake table","text":"<p>This section explains how to delete rows from a Delta Lake table.</p> <p>Suppose you have the following Delta table with four rows:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     4 | d        |\n+-------+----------+\n</code></pre> <p>Here's how to delete all the rows where the <code>num</code> is greater than 2:</p> <pre><code>dt = DeltaTable(\"tmp/my-table\")\ndt.delete(\"num &gt; 2\")\n</code></pre> <p>Here are the contents of the Delta table after the delete operation has been performed:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n+-------+----------+\n</code></pre>"},{"location":"usage/examining-table/","title":"Examining a Table","text":""},{"location":"usage/examining-table/#metadata","title":"Metadata","text":"<p>The delta log maintains basic metadata about a table, including:</p> <ul> <li>A unique <code>id</code></li> <li>A <code>name</code>, if provided</li> <li>A <code>description</code>, if provided</li> <li>The list of <code>partitionColumns</code>.</li> <li>The <code>created_time</code> of the table</li> <li>A map of table <code>configuration</code>. This includes fields such as     <code>delta.appendOnly</code>, which if <code>true</code> indicates the table is not meant     to have data deleted from it.</li> </ul> <p>Get metadata from a table with the DeltaTable.metadata() method:</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/simple_table\")\n&gt;&gt;&gt; dt.metadata()\nMetadata(id: 5fba94ed-9794-4965-ba6e-6ee3c0d22af9, name: None, description: None, partitionColumns: [], created_time: 1587968585495, configuration={})\n</code></pre>"},{"location":"usage/examining-table/#schema","title":"Schema","text":"<p>The schema for the table is also saved in the transaction log. It can either be retrieved in the Delta Lake form as Schema or as a PyArrow schema. The first allows you to introspect any column-level metadata stored in the schema, while the latter represents the schema the table will be loaded into.</p> <p>Use DeltaTable.schema to retrieve the delta lake schema:</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/simple_table\")\n&gt;&gt;&gt; dt.schema()\nSchema([Field(id, PrimitiveType(\"long\"), nullable=True)])\n</code></pre> <p>These schemas have a JSON representation that can be retrieved. To reconstruct from json, use DeltaTable.schema.to_json().</p> <pre><code>&gt;&gt;&gt; dt.schema().to_json()\n'{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}'\n</code></pre> <p>Use DeltaTable.schema.to_pyarrow() to retrieve the PyArrow schema:</p> <pre><code>&gt;&gt;&gt; dt.schema().to_pyarrow()\nid: int64\n</code></pre>"},{"location":"usage/examining-table/#history","title":"History","text":"<p>Depending on what system wrote the table, the delta table may have provenance information describing what operations were performed on the table, when, and by whom. This information is retained for 30 days by default, unless otherwise specified by the table configuration <code>delta.logRetentionDuration</code>.</p> <p>Note</p> <p>This information is not written by all writers and different writers may use different schemas to encode the actions. For Spark\\'s format, see: https://docs.delta.io/latest/delta-utility.html#history-schema</p> <p>To view the available history, use <code>DeltaTable.history</code>:</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/simple_table\")\n&gt;&gt;&gt; dt.history()\n[{'timestamp': 1587968626537, 'operation': 'DELETE', 'operationParameters': {'predicate': '[\"((`id` % CAST(2 AS BIGINT)) = CAST(0 AS BIGINT))\"]'}, 'readVersion': 3, 'isBlindAppend': False},\n {'timestamp': 1587968614187, 'operation': 'UPDATE', 'operationParameters': {'predicate': '((id#697L % cast(2 as bigint)) = cast(0 as bigint))'}, 'readVersion': 2, 'isBlindAppend': False},\n {'timestamp': 1587968604143, 'operation': 'WRITE', 'operationParameters': {'mode': 'Overwrite', 'partitionBy': '[]'}, 'readVersion': 1, 'isBlindAppend': False},\n {'timestamp': 1587968596254, 'operation': 'MERGE', 'operationParameters': {'predicate': '(oldData.`id` = newData.`id`)'}, 'readVersion': 0, 'isBlindAppend': False},\n {'timestamp': 1587968586154, 'operation': 'WRITE', 'operationParameters': {'mode': 'ErrorIfExists', 'partitionBy': '[]'}, 'isBlindAppend': True}]\n</code></pre>"},{"location":"usage/examining-table/#current-add-actions","title":"Current Add Actions","text":"<p>The active state for a delta table is determined by the Add actions, which provide the list of files that are part of the table and metadata about them, such as creation time, size, and statistics. You can get a data frame of the add actions data using <code>DeltaTable.get_add_actions</code>:</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/delta-0.8.0\")\n&gt;&gt;&gt; dt.get_add_actions(flatten=True).to_pandas()\n                                                    path  size_bytes   modification_time  data_change  num_records  null_count.value  min.value  max.value\n0  part-00000-c9b90f86-73e6-46c8-93ba-ff6bfaf892a...         440 2021-03-06 15:16:07         True            2                 0          0          2\n1  part-00000-04ec9591-0b73-459e-8d18-ba5711d6cbe...         440 2021-03-06 15:16:16         True            2                 0          2          4\n</code></pre> <p>This works even with past versions of the table:</p> <pre><code>&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/delta-0.8.0\", version=0)\n&gt;&gt;&gt; dt.get_add_actions(flatten=True).to_pandas()\n                                                path  size_bytes   modification_time  data_change  num_records  null_count.value  min.value  max.value\n0  part-00000-c9b90f86-73e6-46c8-93ba-ff6bfaf892a...         440 2021-03-06 15:16:07         True            2                 0          0          2\n1  part-00001-911a94a2-43f6-4acb-8620-5e68c265498...         445 2021-03-06 15:16:07         True            3                 0          2          4\n</code></pre>"},{"location":"usage/installation/","title":"Installation","text":"<p>The <code>deltalake</code> project can be installed via pip for Python or Cargo for Rust.</p>"},{"location":"usage/installation/#install-delta-lake-for-python","title":"Install Delta Lake for Python","text":"<p>With pip:</p> <pre><code>pip install deltalake\n</code></pre> <p>With Conda:</p> <pre><code>conda install -c conda-forge deltalake\n</code></pre>"},{"location":"usage/installation/#install-delta-lake-for-rust","title":"Install Delta Lake for Rust","text":"<p>With Cargo:</p> <pre><code>cargo add deltalake\n</code></pre>"},{"location":"usage/installation/#run-delta-lake-and-pandas-in-a-jupyter-notebook","title":"Run Delta Lake and pandas in a Jupyter Notebook","text":"<p>You can easily run Delta Lake and pandas in a Jupyter notebook.</p> <p>Create an environment file with the dependencies as follows:</p> <pre><code>name: deltalake-minimal\nchannels:\n  - conda-forge\n  - defaults\ndependencies:\n  - python=3.11\n  - ipykernel\n  - pandas\n  - polars\n  - jupyterlab\n  - pip\n  - pip:\n    - deltalake\n</code></pre> <p>Create a virtual environment with the dependencies:</p> <pre><code>conda env create -f deltalake-minimal.yml\n</code></pre> <p>Open the Jupyter notebook and run commands as follows:</p> <p></p>"},{"location":"usage/loading-table/","title":"Loading a Delta Table","text":"<p>To load the current version, use the constructor:</p> <pre><code>&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/delta-0.2.0\")\n</code></pre> <p>Depending on your storage backend, you could use the <code>storage_options</code> parameter to provide some configuration. Configuration is defined for specific backends - s3 options, azure options, gcs options.</p> <pre><code>&gt;&gt;&gt; storage_options = {\"AWS_ACCESS_KEY_ID\": \"THE_AWS_ACCESS_KEY_ID\", \"AWS_SECRET_ACCESS_KEY\":\"THE_AWS_SECRET_ACCESS_KEY\"}\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/delta-0.2.0\", storage_options=storage_options)\n</code></pre> <p>The configuration can also be provided via the environment, and the basic service provider is derived from the URL being used. We try to support many of the well-known formats to identify basic service properties.</p> <p>S3:</p> <ul> <li>s3://\\&lt;bucket&gt;/\\&lt;path&gt;</li> <li>s3a://\\&lt;bucket&gt;/\\&lt;path&gt;</li> </ul> <p>Azure:</p> <ul> <li>az://\\&lt;container&gt;/\\&lt;path&gt;</li> <li>adl://\\&lt;container&gt;/\\&lt;path&gt;</li> <li>abfs://\\&lt;container&gt;/\\&lt;path&gt;</li> </ul> <p>GCS:</p> <ul> <li>gs://\\&lt;bucket&gt;/\\&lt;path&gt;</li> </ul> <p>Alternatively, if you have a data catalog you can load it by reference to a database and table name. Currently only AWS Glue is supported.</p> <p>For AWS Glue catalog, use AWS environment variables to authenticate.</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; from deltalake import DataCatalog\n&gt;&gt;&gt; database_name = \"simple_database\"\n&gt;&gt;&gt; table_name = \"simple_table\"\n&gt;&gt;&gt; data_catalog = DataCatalog.AWS\n&gt;&gt;&gt; dt = DeltaTable.from_data_catalog(data_catalog=data_catalog, database_name=database_name, table_name=table_name)\n&gt;&gt;&gt; dt.to_pyarrow_table().to_pydict()\n{'id': [5, 7, 9, 5, 6, 7, 8, 9]}\n</code></pre>"},{"location":"usage/loading-table/#custom-storage-backends","title":"Custom Storage Backends","text":"<p>While delta always needs its internal storage backend to work and be properly configured, in order to manage the delta log, it may sometime be advantageous - and is common practice in the arrow world - to customize the storage interface used for reading the bulk data.</p> <p><code>deltalake</code> will work with any storage compliant with <code>pyarrow.fs.FileSystem</code>, however the root of the filesystem has to be adjusted to point at the root of the Delta table. We can achieve this by wrapping the custom filesystem into a <code>pyarrow.fs.SubTreeFileSystem</code>.</p> <pre><code>import pyarrow.fs as fs\nfrom deltalake import DeltaTable\n\npath = \"&lt;path/to/table&gt;\"\nfilesystem = fs.SubTreeFileSystem(path, fs.LocalFileSystem())\n\ndt = DeltaTable(path)\nds = dt.to_pyarrow_dataset(filesystem=filesystem)\n</code></pre> <p>When using the pyarrow factory method for file systems, the normalized path is provided on creation. In case of S3 this would look something like:</p> <pre><code>import pyarrow.fs as fs\nfrom deltalake import DeltaTable\n\ntable_uri = \"s3://&lt;bucket&gt;/&lt;path&gt;\"\nraw_fs, normalized_path = fs.FileSystem.from_uri(table_uri)\nfilesystem = fs.SubTreeFileSystem(normalized_path, raw_fs)\n\ndt = DeltaTable(table_uri)\nds = dt.to_pyarrow_dataset(filesystem=filesystem)\n</code></pre>"},{"location":"usage/loading-table/#time-travel","title":"Time Travel","text":"<p>To load previous table states, you can provide the version number you wish to load:</p> <pre><code>&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/simple_table\", version=2)\n</code></pre> <p>Once you\\'ve loaded a table, you can also change versions using either a version number or datetime string:</p> <pre><code>&gt;&gt;&gt; dt.load_version(1)\n&gt;&gt;&gt; dt.load_with_datetime(\"2021-11-04 00:05:23.283+00:00\")\n</code></pre> <p>Warning</p> <p>Previous table versions may not exist if they have been vacuumed, in which case an exception will be thrown. See Vacuuming tables for more information.</p>"},{"location":"usage/managing-tables/","title":"Managing Delta Tables","text":""},{"location":"usage/managing-tables/#vacuuming-tables","title":"Vacuuming tables","text":"<p>Vacuuming a table will delete any files that have been marked for deletion. This may make some past versions of a table invalid, so this can break time travel. However, it will save storage space. Vacuum will retain files in a certain window, by default one week, so time travel will still work in shorter ranges.</p> <p>Delta tables usually don't delete old files automatically, so vacuuming regularly is considered good practice, unless the table is only appended to.</p> <p>Use <code>DeltaTable.vacuum</code> to perform the vacuum operation. Note that to prevent accidental deletion, the function performs a dry-run by default: it will only list the files to be deleted. Pass <code>dry_run=False</code> to actually delete files.</p> <pre><code>&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/simple_table\")\n&gt;&gt;&gt; dt.vacuum()\n['../rust/tests/data/simple_table/part-00006-46f2ff20-eb5d-4dda-8498-7bfb2940713b-c000.snappy.parquet',\n '../rust/tests/data/simple_table/part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet',\n '../rust/tests/data/simple_table/part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet',\n ...]\n&gt;&gt;&gt; dt.vacuum(dry_run=False) # Don't run this unless you are sure!\n</code></pre>"},{"location":"usage/managing-tables/#optimizing-tables","title":"Optimizing tables","text":"<p>Optimizing tables is not currently supported.</p>"},{"location":"usage/overview/","title":"Usage","text":"<p>A DeltaTable represents the state of a delta table at a particular version. This includes which files are currently part of the table, the schema of the table, and other metadata such as creation time.</p>  Python Rust <p> <code>DeltaTable</code> <pre><code>from deltalake import DeltaTable\n\ndt = DeltaTable(\"../rust/tests/data/delta-0.2.0\")\nprint(f\"Version: {dt.version()}\")\nprint(f\"Files: {dt.files()}\")\n</code></pre></p> <p> <code>DeltaTable</code> <pre><code>let table = deltalake::open_table(\"../rust/tests/data/simple_table\").await.unwrap();\nprintln!(\"Version: {}\", table.version());\nprintln!(\"Files: {}\", table.get_files());\n</code></pre></p>"},{"location":"usage/querying-delta-tables/","title":"Querying Delta Tables","text":"<p>Delta tables can be queried in several ways. By loading as Arrow data or an Arrow dataset, they can be used by compatible engines such as Pandas and DuckDB. By passing on the list of files, they can be loaded into other engines such as Dask.</p> <p>Delta tables are often larger than can fit into memory on a single computer, so this module provides ways to read only the parts of the data you need. Partition filters allow you to skip reading files that are part of irrelevant partitions. Only loading the columns required also saves memory. Finally, some methods allow reading tables batch-by-batch, allowing you to process the whole table while only having a portion loaded at any given time.</p> <p>To load into Pandas or a PyArrow table use the <code>DeltaTable.to_pandas</code> and <code>DeltaTable.to_pyarrow_table</code> methods, respectively. Both of these support filtering partitions and selecting particular columns.</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/delta-0.8.0-partitioned\")\n&gt;&gt;&gt; dt.schema().to_pyarrow()\nvalue: string\nyear: string\nmonth: string\nday: string\n&gt;&gt;&gt; dt.to_pandas(partitions=[(\"year\", \"=\", \"2021\")], columns=[\"value\"])\n      value\n0     6\n1     7\n2     5\n3     4\n&gt;&gt;&gt; dt.to_pyarrow_table(partitions=[(\"year\", \"=\", \"2021\")], columns=[\"value\"])\npyarrow.Table\nvalue: string\n</code></pre> <p>Converting to a PyArrow Dataset allows you to filter on columns other than partition columns and load the result as a stream of batches rather than a single table. Convert to a dataset using <code>DeltaTable.to_pyarrow_dataset</code>. Filters applied to datasets will use the partition values and file statistics from the Delta transaction log and push down any other filters to the scanning operation.</p> <pre><code>&gt;&gt;&gt; import pyarrow.dataset as ds\n&gt;&gt;&gt; dataset = dt.to_pyarrow_dataset()\n&gt;&gt;&gt; condition = (ds.field(\"year\") == \"2021\") &amp; (ds.field(\"value\") &gt; \"4\")\n&gt;&gt;&gt; dataset.to_table(filter=condition, columns=[\"value\"]).to_pandas()\n  value\n0     6\n1     7\n2     5\n&gt;&gt;&gt; batch_iter = dataset.to_batches(filter=condition, columns=[\"value\"], batch_size=2)\n&gt;&gt;&gt; for batch in batch_iter: print(batch.to_pandas())\n  value\n0     6\n1     7\n  value\n0     5\n</code></pre> <p>PyArrow datasets may also be passed to compatible query engines, such as DuckDB</p> <pre><code>&gt;&gt;&gt; import duckdb\n&gt;&gt;&gt; ex_data = duckdb.arrow(dataset)\n&gt;&gt;&gt; ex_data.filter(\"year = 2021 and value &gt; 4\").project(\"value\")\n---------------------\n-- Expression Tree --\n---------------------\nProjection [value]\n  Filter [year=2021 AND value&gt;4]\n    arrow_scan(140409099470144, 4828104688, 1000000)\n\n---------------------\n-- Result Columns  --\n---------------------\n- value (VARCHAR)\n\n---------------------\n-- Result Preview  --\n---------------------\nvalue\nVARCHAR\n[ Rows: 3]\n6\n7\n5\n</code></pre> <p>Finally, you can always pass the list of file paths to an engine. For example, you can pass them to <code>dask.dataframe.read_parquet</code>:</p> <pre><code>&gt;&gt;&gt; import dask.dataframe as dd\n&gt;&gt;&gt; df = dd.read_parquet(dt.file_uris())\n&gt;&gt;&gt; df\nDask DataFrame Structure:\n                value             year            month              day\nnpartitions=6\n               object  category[known]  category[known]  category[known]\n                  ...              ...              ...              ...\n...               ...              ...              ...              ...\n                  ...              ...              ...              ...\n                  ...              ...              ...              ...\nDask Name: read-parquet, 6 tasks\n&gt;&gt;&gt; df.compute()\n  value  year month day\n0     1  2020     1   1\n0     2  2020     2   3\n0     3  2020     2   5\n0     4  2021     4   5\n0     5  2021    12   4\n0     6  2021    12  20\n1     7  2021    12  20\n</code></pre>"},{"location":"usage/writing-delta-tables/","title":"Writing Delta Tables","text":"<p>For overwrites and appends, use <code>write_deltalake</code>. If the table does not already exist, it will be created. The <code>data</code> parameter will accept a Pandas DataFrame, a PyArrow Table, or an iterator of PyArrow Record Batches.</p> <pre><code>&gt;&gt;&gt; from deltalake import write_deltalake\n&gt;&gt;&gt; df = pd.DataFrame({'x': [1, 2, 3]})\n&gt;&gt;&gt; write_deltalake('path/to/table', df)\n</code></pre> <p>Note: <code>write_deltalake</code> accepts a Pandas DataFrame, but will convert it to a Arrow table before writing. See caveats in <code>pyarrow:python/pandas</code>.</p> <p>By default, writes create a new table and error if it already exists. This is controlled by the <code>mode</code> parameter, which mirrors the behavior of Spark's <code>pyspark.sql.DataFrameWriter.saveAsTable</code> DataFrame method. To overwrite pass in <code>mode='overwrite'</code> and to append pass in <code>mode='append'</code>:</p> <pre><code>&gt;&gt;&gt; write_deltalake('path/to/table', df, mode='overwrite')\n&gt;&gt;&gt; write_deltalake('path/to/table', df, mode='append')\n</code></pre> <p><code>write_deltalake</code> will raise <code>ValueError</code> if the schema of the data passed to it differs from the existing table's schema. If you wish to alter the schema as part of an overwrite pass in <code>overwrite_schema=True</code>.</p>"},{"location":"usage/writing-delta-tables/#overwriting-a-partition","title":"Overwriting a partition","text":"<p>You can overwrite a specific partition by using <code>mode=\"overwrite\"</code> together with <code>partition_filters</code>. This will remove all files within the matching partition and insert your data as new files. This can only be done on one partition at a time. All of the input data must belong to that partition or else the method will raise an error.</p> <pre><code>&gt;&gt;&gt; from deltalake import write_deltalake\n&gt;&gt;&gt; df = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'a', 'b']})\n&gt;&gt;&gt; write_deltalake('path/to/table', df, partition_by=['y'])\n\n&gt;&gt;&gt; table = DeltaTable('path/to/table')\n&gt;&gt;&gt; df2 = pd.DataFrame({'x': [100], 'y': ['b']})\n&gt;&gt;&gt; write_deltalake(table, df2, partition_filters=[('y', '=', 'b')], mode=\"overwrite\")\n\n&gt;&gt;&gt; table.to_pandas()\n     x  y\n0    1  a\n1    2  a\n2  100  b\n</code></pre> <p>This method could also be used to insert a new partition if one doesn't already exist, making this operation idempotent.</p>"},{"location":"usage/optimize/delta-lake-z-order/","title":"Delta Lake Z Order","text":"<p>This section explains how to Z Order a Delta table.</p> <p>Z Ordering colocates similar data in the same files, which allows for better file skipping and faster queries.</p> <p>Suppose you have a table with <code>first_name</code>, <code>age</code>, and <code>country</code> columns.</p> <p>If you Z Order the data by the <code>country</code> column, then individuals from the same country will be stored in the same files.  When you subquently query the data for individuals from a given country, it will execute faster because more data can be skipped.</p> <p>Here's how to Z Order a Delta table:</p> <pre><code>dt = DeltaTable(\"tmp\")\ndt.optimize.z_order([country])\n</code></pre>"},{"location":"usage/optimize/small-file-compaction-with-optimize/","title":"Delta Lake small file compaction with optimize","text":"<p>This post shows you how to perform small file compaction with using the <code>optimize</code> method.  This was added to the <code>DeltaTable</code> class in version 0.9.0.  This command rearranges the small files into larger files which will reduce the number of files and speed up queries.</p> <p>This is very helpful for workloads that append frequently. For example, if you have a table that is appended to every 10 minutes, after a year you will have 52,560 files in the table. If the table is partitioned by another dimension, you will have 52,560 files per partition; with just 100 unique values that's millions of files. By running <code>optimize</code> periodically, you can reduce the number of files in the table to a more manageable number.</p> <p>Typically, you will run optimize less frequently than you append data. If possible, you might run optimize once you know you have finished writing to a particular partition. For example, on a table partitioned by date, you might append data every 10 minutes, but only run optimize once a day at the end of the day. This will ensure you don't need to compact the same data twice.</p> <p>This section will also teach you about how to use <code>vacuum</code> to physically remove files from storage that are no longer needed.  You\u2019ll often want vacuum after running optimize to remove the small files from storage once they\u2019ve been compacted into larger files.</p> <p>Let\u2019s start with an example to explain these key concepts.  All the code covered in this post is stored in this notebook in case you\u2019d like to follow along.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#create-a-delta-table-with-small-files","title":"Create a Delta table with small files","text":"<p>Let\u2019s start by creating a Delta table with a lot of small files so we can demonstrate the usefulness of the <code>optimize</code> command.</p> <p>Start by writing a function that generates on thousand rows of random data given a timestamp.</p> <pre><code>def record_observations(date: datetime) -&gt; pa.Table:\n    \"\"\"Pulls data for a certain datetime\"\"\"\n    nrows = 1000\n    return pa.table(\n        {\n            \"date\": pa.array([date.date()] * nrows),\n            \"timestamp\": pa.array([date] * nrows),\n            \"value\": pc.random(nrows),\n        }\n    )\n</code></pre> <p>Let\u2019s run this function and observe the output:</p> <pre><code>record_observations(datetime(2021, 1, 1, 12)).to_pandas()\n\n    date                timestamp   value\n0   2021-01-01  2021-01-01 12:00:00 0.3186397383362023\n1   2021-01-01  2021-01-01 12:00:00 0.04253766974259088\n2   2021-01-01  2021-01-01 12:00:00 0.9355682965171573\n\u2026\n999 2021-01-01  2021-01-01 12:00:00 0.23207037062879843\n</code></pre> <p>Let\u2019s write 100 hours worth of data to the Delta table.</p> <pre><code># Every hour starting at midnight on 2021-01-01\nhours_iter = (datetime(2021, 1, 1) + timedelta(hours=i) for i in itertools.count())\n\n# Write 100 hours worth of data\nfor timestamp in itertools.islice(hours_iter, 100):\n    write_deltalake(\n        \"observation_data\",\n        record_observations(timestamp),\n        partition_by=[\"date\"],\n        mode=\"append\",\n    )\n</code></pre> <p>This data was appended to the Delta table in 100 separate transactions, so the table will contain 100 transaction log entries and 100 data files.  You can see the number of files with the <code>files()</code> method.</p> <pre><code>dt = DeltaTable(\"observation_data\")\nlen(dt.files()) # 100\n</code></pre> <p>Here\u2019s how the files are persisted in storage.</p> <pre><code>observation_data\n\u251c\u2500\u2500 _delta_log\n\u2502   \u251c\u2500\u2500 00000000000000000000.json\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u2514\u2500\u2500 00000000000000000099.json\n\u251c\u2500\u2500 date=2021-01-01\n\u2502   \u251c\u2500\u2500 0-cfe227c6-edd9-4369-a1b0-db4559a2e693-0.parquet\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u251c\u2500\u2500 23-a4ace29e-e73e-40a1-81d3-0f5dc13093de-0.parquet\n\u251c\u2500\u2500 date=2021-01-02\n\u2502   \u251c\u2500\u2500 24-9698b456-66eb-4075-8732-fe56d81edb60-0.parquet\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u2514\u2500\u2500 47-d3fce527-e018-4c02-8acd-a649f6f523d2-0.parquet\n\u251c\u2500\u2500 date=2021-01-03\n\u2502   \u251c\u2500\u2500 48-fd90a7fa-5a14-42ed-9f59-9fe48d87899d-0.parquet\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u2514\u2500\u2500 71-5f143ade-8ae2-4854-bdc5-61154175665f-0.parquet\n\u251c\u2500\u2500 date=2021-01-04\n\u2502   \u251c\u2500\u2500 72-477c10fe-dc09-4087-80f0-56006e4a7911-0.parquet\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u2514\u2500\u2500 95-1c92cbce-8af4-4fe4-9c11-832245cf4d40-0.parquet\n\u2514\u2500\u2500 date=2021-01-05\n    \u251c\u2500\u2500 96-1b878ee5-25fd-431a-bc3e-6dcacc96b470-0.parquet\n    \u251c\u2500\u2500 \u2026\n    \u2514\u2500\u2500 99-9650ed63-c195-433d-a86b-9469088c14ba-0.parquet\n</code></pre> <p>Each of these Parquet files are tiny - they\u2019re only 10 KB.  Let\u2019s see how to compact these tiny files into larger files, which is more efficient for data queries.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#compact-small-files-in-the-delta-table-with-optimize","title":"Compact small files in the Delta table with optimize","text":"<p>Let\u2019s run the optimize command to compact the existing small files into larger files:</p> <pre><code>dt = DeltaTable(\"observation_data\")\n\ndt.optimize()\n</code></pre> <p>Here\u2019s the output of the command:</p> <pre><code>{'numFilesAdded': 5,\n 'numFilesRemoved': 100,\n 'filesAdded': {'min': 39000,\n  'max': 238282,\n  'avg': 198425.6,\n  'totalFiles': 5,\n  'totalSize': 992128},\n 'filesRemoved': {'min': 10244,\n  'max': 10244,\n  'avg': 10244.0,\n  'totalFiles': 100,\n  'totalSize': 1024400},\n 'partitionsOptimized': 5,\n 'numBatches': 1,\n 'totalConsideredFiles': 100,\n 'totalFilesSkipped': 0,\n 'preserveInsertionOrder': True}\n</code></pre> <p>The optimize operation has added 5 new files and marked 100 exisitng files for removal (this is also known as \u201ctombstoning\u201d files).  It has compacted the 100 tiny files into 5 larger files.</p> <p>Let\u2019s append some more data to the Delta table and see how we can selectively run optimize on the new data that\u2019s added.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#handling-incremental-updates-with-optimize","title":"Handling incremental updates with optimize","text":"<p>Let\u2019s append another 24 hours of data to the Delta table:</p> <pre><code>for timestamp in itertools.islice(hours_iter, 24):\n    write_deltalake(\n        dt,\n        record_observations(timestamp),\n        partition_by=[\"date\"],\n        mode=\"append\",\n    )\n</code></pre> <p>We can use <code>get_add_actions()</code> to introspect the table state. We can see that <code>2021-01-06</code> has only a few hours of data so far, so we don't want to optimize that yet. But <code>2021-01-05</code> has all 24 hours of data, so it's ready to be optimized.</p> <pre><code>dt.get_add_actions(flatten=True).to_pandas()[\n    \"partition.date\"\n].value_counts().sort_index()\n\n2021-01-01     1\n2021-01-02     1\n2021-01-03     1\n2021-01-04     1\n2021-01-05    21\n2021-01-06     4\n</code></pre> <p>To optimize a single partition, you can pass in a <code>partition_filters</code> argument speficying which partitions to optimize.</p> <pre><code>dt.optimize(partition_filters=[(\"date\", \"=\", \"2021-01-05\")])\n\n{'numFilesAdded': 1,\n 'numFilesRemoved': 21,\n 'filesAdded': {'min': 238282,\n  'max': 238282,\n  'avg': 238282.0,\n  'totalFiles': 1,\n  'totalSize': 238282},\n 'filesRemoved': {'min': 10244,\n  'max': 39000,\n  'avg': 11613.333333333334,\n  'totalFiles': 21,\n  'totalSize': 243880},\n 'partitionsOptimized': 1,\n 'numBatches': 1,\n 'totalConsideredFiles': 21,\n 'totalFilesSkipped': 0,\n 'preserveInsertionOrder': True}\n</code></pre> <p>This optimize operation tombstones 21 small data files and adds one file with all the existing data properly condensed.  Let\u2019s take a look a portion of the <code>_delta_log/00000000000000000125.json</code> file, which is the transaction log entry that corresponds with this incremental optimize command.</p> <pre><code>{\n  \"remove\": {\n    \"path\": \"date=2021-01-05/part-00000-41178aab-2491-488f-943d-8f03867295ee-c000.snappy.parquet\",\n    \"deletionTimestamp\": 1683465499480,\n    \"dataChange\": false,\n    \"extendedFileMetadata\": null,\n    \"partitionValues\": {\n      \"date\": \"2021-01-05\"\n    },\n    \"size\": 39000,\n    \"tags\": null\n  }\n}\n\n{\n  \"remove\": {\n    \"path\": \"date=2021-01-05/101-79ae6fc9-c0cc-49ec-bb94-9aba879ac949-0.parquet\",\n    \"deletionTimestamp\": 1683465499481,\n    \"dataChange\": false,\n    \"extendedFileMetadata\": null,\n    \"partitionValues\": {\n      \"date\": \"2021-01-05\"\n    },\n    \"size\": 10244,\n    \"tags\": null\n  }\n}\n\n\u2026\n\n{\n  \"add\": {\n    \"path\": \"date=2021-01-05/part-00000-4b020a40-c836-4a11-851f-4691370c9f3a-c000.snappy.parquet\",\n    \"size\": 238282,\n    \"partitionValues\": {\n      \"date\": \"2021-01-05\"\n    },\n    \"modificationTime\": 1683465499493,\n    \"dataChange\": false,\n    \"stats\": \"{\\\"numRecords\\\":24000,\\\"minValues\\\":{\\\"value\\\":0.00005581532256615507,\\\"timestamp\\\":\\\"2021-01-05T00:00:00.000Z\\\"},\\\"maxValues\\\":{\\\"timestamp\\\":\\\"2021-01-05T23:00:00.000Z\\\",\\\"value\\\":0.9999911402868216},\\\"nullCount\\\":{\\\"timestamp\\\":0,\\\"value\\\":0}}\",\n    \"tags\": null\n  }\n}\n</code></pre> <p>The trasaction log indicates that many files have been tombstoned and one file is added, as expected.</p> <p>The Delta Lake optimize command \u201cremoves\u201d data by marking the data files as removed in the transaction log.  The optimize command doesn\u2019t physically delete the Parquet file from storage.  Optimize performs a \u201clogical remove\u201d not a \u201cphysical remove\u201d.</p> <p>Delta Lake uses logical operations so you can time travel back to earlier versions of your data.  You can vacuum your Delta table to physically remove Parquet files from storage if you don\u2019t need to time travel and don\u2019t want to pay to store the tombstoned files.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#vacuuming-after-optimizing","title":"Vacuuming after optimizing","text":"<p>The vacuum command deletes all files from storage that are marked for removal in the transaction log and older than the retention period which is 7 days by default.</p> <p>It\u2019s normally a good idea to have a retention period of at least 7 days.  For purposes of this example, we will set the retention period to zero, just so you can see how the files get removed from storage.  Adjusting the retention period in this manner isn\u2019t recommended for production use cases.</p> <p>Let\u2019s run the vacuum command:</p> <pre><code>dt.vacuum(retention_hours=0, enforce_retention_duration=False, dry_run=False)\n</code></pre> <p>The command returns a list of all the files that are removed from storage:</p> <pre><code>['date=2021-01-02/39-a98680f2-0e0e-4f26-a491-18b183f9eb05-0.parquet',\n 'date=2021-01-02/41-e96bc8bb-c571-484c-b534-e897424fb7da-0.parquet',\n \u2026\n 'date=2021-01-01/0-cfe227c6-edd9-4369-a1b0-db4559a2e693-0.parquet',\n 'date=2021-01-01/18-ded53418-172b-4e40-bf2e-7c8142e71bd1-0.parquet']\n</code></pre> <p>Let\u2019s look at the content of the Delta table now that all the really small files have been removed from storage:</p> <pre><code>observation_data\n\u251c\u2500\u2500 _delta_log\n\u2502   \u251c\u2500\u2500 00000000000000000000.json\n\u2502   \u251c\u2500\u2500 00000000000000000001.json\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u251c\u2500\u2500 00000000000000000124.json\n\u2502   \u2514\u2500\u2500 00000000000000000125.json\n\u251c\u2500\u2500 date=2021-01-01\n\u2502   \u2514\u2500\u2500 part-00000-31e3df5a-8bbe-425c-b85d-77794f922837-c000.snappy.parquet\n\u251c\u2500\u2500 date=2021-01-02\n\u2502   \u2514\u2500\u2500 part-00000-8af07878-b179-49ce-a900-d58595ffb60a-c000.snappy.parquet\n\u251c\u2500\u2500 date=2021-01-03\n\u2502   \u2514\u2500\u2500 part-00000-5e980864-b32f-4686-a58d-a75fae455c1e-c000.snappy.parquet\n\u251c\u2500\u2500 date=2021-01-04\n\u2502   \u2514\u2500\u2500 part-00000-1e82d23b-084d-47e3-9790-d68289c39837-c000.snappy.parquet\n\u251c\u2500\u2500 date=2021-01-05\n\u2502   \u2514\u2500\u2500 part-00000-4b020a40-c836-4a11-851f-4691370c9f3a-c000.snappy.parquet\n\u2514\u2500\u2500 date=2021-01-06\n    \u251c\u2500\u2500 121-0ecb5d70-4a28-4cd4-b2d2-89ee2285eaaa-0.parquet\n    \u251c\u2500\u2500 122-6b2d2758-9154-4392-b287-fe371ee507ec-0.parquet\n    \u251c\u2500\u2500 123-551d318f-4968-441f-83fc-89f98cd15daf-0.parquet\n    \u2514\u2500\u2500 124-287309d3-662e-449d-b4da-2e67b7cc0557-0.parquet\n</code></pre> <p>All the partitions only contain a single file now, except for the <code>date=2021-01-06</code> partition that has not been compacted yet.</p> <p>An entire partition won\u2019t necessarily get compacted to a single data file when optimize is run.  Each partition has data files that are condensed to the target file size.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#what-causes-the-small-file-problem","title":"What causes the small file problem?","text":"<p>Delta tables can accumulate small files for a variety of reasons:</p> <ul> <li>User error: users can accidentally write files that are too small.  Users should sometimes repartition in memory before writing to disk to avoid appending files that are too small.</li> <li>Frequent appends: systems that append more often tend to append more smaller files.  A pipeline that appends every minute will generally generate ten times as many small files compared to a system that appends every ten minutes.</li> <li>Appending to partitioned data lakes with high cardinality columns can also cause small files.  If you append every hour to a table that\u2019s partitioned on a column with 1,000 distinct values, then every append could create 1,000 new files.  Partitioning by date avoids this problem because the data isn\u2019t split up across partitions in this manner.  </li> </ul>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#conclusion","title":"Conclusion","text":"<p>This page showed you how to create a Delta table with many small files, compact the small files into larger files with optimize, and remove the tombstoned files from storage with vacuum.</p> <p>You also learned about how to incrementally optimize partitioned Delta tables, so you only compact newly added data.</p> <p>An excessive number of small files slows down Delta table queries, so periodic compaction is important.  Make sure to properly maintain your Delta tables, so performance does not degrade over time.</p>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"The deltalake package","text":"<p>This is the documentation for the native Rust/Python implementation of Delta Lake. It is based on the delta-rs Rust library and requires no Spark or JVM dependencies. For the PySpark implementation, see delta-spark instead.</p> <p>This module provides the capability to read, write, and manage Delta Lake tables with Python or Rust without Spark or Java. It uses Apache Arrow under the hood, so is compatible with other Arrow-native or integrated libraries such as pandas, DuckDB, and Polars.</p>"},{"location":"#important-terminology","title":"Important terminology","text":"<ul> <li>\"Rust deltalake\" refers to the Rust API of delta-rs (no Spark dependency)</li> <li>\"Python deltalake\" refers to the Python API of delta-rs (no Spark dependency)</li> <li>\"Delta Spark\" refers to the Scala impementation of the Delta Lake transaction log protocol.  This depends on Spark and Java.</li> </ul>"},{"location":"#why-implement-the-delta-lake-transaction-log-protocol-in-rust-and-scala","title":"Why implement the Delta Lake transaction log protocol in Rust and Scala?","text":"<p>Delta Spark depends on Java and Spark, which is fine for many use cases, but not all Delta Lake users want to depend on these libraries.  delta-rs allows using Delta Lake in Rust or other native projects when using a JVM is often not an option.</p> <p>Python deltalake lets you query Delta tables without depending on Java/Scala.</p> <p>Suppose you want to query a Delta table with pandas on your local machine.  Python deltalake makes it easy to query the table with a simple <code>pip install</code> command - no need to install Java.</p>"},{"location":"#contributing","title":"Contributing","text":"<p>The Delta Lake community welcomes contributors from all developers, regardless of your experience or programming background.</p> <p>You can write Rust code, Python code, documentation, submit bugs, or give talks to the community.  We welcome all of these contributions.</p> <p>Feel free to join our Slack and message us in the #delta-rs channel any time!</p> <p>We value kind communication and building a productive, friendly environment for maximum collaboration and fun.</p>"},{"location":"#project-history","title":"Project history","text":"<p>Check out this video by Denny Lee &amp; QP Hou to learn about the genesis of the delta-rs project:</p>"},{"location":"why-use-delta-lake/","title":"Why use Delta Lake","text":"<p>This page explains why Delta Lake is a better storage format for most tabular data analyses than data lake alternatives.</p> <p>Delta Lake provides developer-friendly features, reliable transactions, and fast performance compared with alternatives like Parquet or CSV.</p>"},{"location":"why-use-delta-lake/#fast-performance","title":"Fast performance","text":"<p>Delta tables store data in Parquet files and persist file-level metadata in the transaction log.</p> <p>This offers two main performance advantages:</p> <ul> <li>File skipping based on metadata that\u2019s quickly accessible</li> <li>Easy identification of all file paths for the table, compared to file listing operations that can be slow, especially on cloud object stores</li> </ul> <p>Delta Lake stores min/max values for each column of each file in the table.  Certain queries can skip entire files based on the metadata.  File skipping can be a massive performance optimization.</p> <p>Delta Lake also makes it easy to rearrange data in the table, so more file skipping is possible.  For example, the table can be partitioned or Z Ordered, so that similar data is colocated in the same files and data skipping is optimal for your query patterns.</p> <p>For data lakes, you need to run file listing operations to get the file paths before you can actually read the data.  Listing all the files in a data lake can take a long time, especially if there are a lot of files and they are stored in Hive-style partitions.</p> <p>Delta Lake stores all the file paths in the transaction log.  So you can quickly get the file paths directly from the log and then run your query.  Delta Lake also stores the file-level metadata in the transaction log which is quicker than opening all the files in the data lake and grabbing the metadata from the file footer.</p>"},{"location":"why-use-delta-lake/#developer-friendly-features","title":"Developer friendly features","text":"<p>Many basic data operations are hard in data lakes but quite easy with Delta Lake.  The only data operation that\u2019s easy with in data lake is appending data.  Delta Lake makes all data operations easy including the following:</p> <ul> <li>Appends</li> <li>Upserts</li> <li>Deletes</li> <li>Replace where</li> </ul> <p>Even deleting a few rows of data from a data lake is hard.  It\u2019s even harder if you want to run the operation in a performant manner.</p> <p>Delta Lake makes it easy to run common data operations and executes them performantly under the hood.</p> <p>Delta Lake also executes write operations as transactions, which makes data operations safer and prevents downtime.  Write operations will cause data lakes to be in an unstable state while the computations is running.  For example, if you read a data lake while a delete operation is running, then you may get the wrong data.</p> <p>Let\u2019s explore the benefits of reliable transactions in more detail.</p>"},{"location":"why-use-delta-lake/#reliable-transactions","title":"Reliable transactions","text":"<p>Delta Lake supports transactions which means that write operations have the following characteristics:</p> <ul> <li>They either finish completely or don\u2019t run at all</li> <li>They are executed in a serial manner and don\u2019t conflict with other transactions</li> <li>They don\u2019t corrupt a table or violate table constraints</li> </ul> <p>Data lakes don\u2019t support transactions, so the write operations can cause the following errors:</p> <ul> <li>There is no schema enforcement, so you can append data to a data lake with a mismatching schema</li> <li>Reading the data lake often yields incorrect results while write transactions are performed</li> <li>Data lakes can be corrupted for invalid write operations or computations that error-out</li> <li>Concurrent transactions that conflict can cause data loss</li> </ul> <p>Production data systems should rely on storage systems like Delta Lake that support transactions.</p>"},{"location":"why-use-delta-lake/#interoperability","title":"Interoperability","text":"<p>Delta Lake tables are interoperable and can be read/written by multiple different query engines.</p> <p>For example, you can create a Delta table with Spark, append to it with pandas, and then read it with Polars.</p> <p></p> <p>Delta tables are powerful because they are interoperable with various query engines and computation runtimes.</p> <p>Suppose you have a Delta table that\u2019s updated with an AWS Lambda function every 5 minutes.  There is only a small amount of data collected every 5 minutes, so a lightweight runtime like AWS Lambda is sufficient.</p> <p>Further suppose that the overall table is quite large.  So when you want to perform DML operations or query the whole table, your team uses a Spark cluster.</p> <p>Delta Lake is flexible to allow these types of operations from multiple readers and writers.  This provides teams with the flexibility to choose the right tool for the job.</p>"},{"location":"why-use-delta-lake/#support-for-many-languages","title":"Support for many languages","text":"<p>Delta tables can be queried with a variety of different languages.  This project provides APIs for Rust and Python users and does not depend on Java or Scala.  This project is a great alternative for pandas, Polars, DuckDB, or DataFusion.</p> <p>Delta Lake supports many languages and even more language support is coming soon!</p>"},{"location":"why-use-delta-lake/#support-on-multiple-clouds","title":"Support on multiple clouds","text":"<p>Delta Lake supports multiple clouds including GCP, AWS, and Azure.</p> <p>You can also use Delta Lake on your local machine or in an on-prem environment.</p> <p>Delta Lake is quite portable.</p>"},{"location":"why-use-delta-lake/#conclusion","title":"Conclusion","text":"<p>Delta Lake is a mature table format that offers users tons of advantages over a data lake with virtually no downsides.</p> <p>Once you start using Delta Lake, you will never want to go back to data lakes that expose you to a variety of dangerous bugs, poor performance, and reliability issues.</p> <p>The Delta Lake community is also welcome and open.  We gladly accept new contributors and help users with their questions.</p>"},{"location":"api/catalog/","title":"Catalog","text":"","boost":2},{"location":"api/catalog/#deltalake.data_catalog.DataCatalog","title":"deltalake.data_catalog.DataCatalog","text":"<p>             Bases: <code>Enum</code></p> <p>List of the Data Catalogs</p>","boost":2},{"location":"api/catalog/#deltalake.data_catalog.DataCatalog.AWS","title":"AWS  <code>class-attribute</code> <code>instance-attribute</code>","text":"<pre><code>AWS = 'glue'\n</code></pre> <p>Refers to the <code>AWS Glue Data Catalog &lt;https://docs.aws.amazon.com/glue/latest/dg/catalog-and-crawler.html&gt;</code>_</p>","boost":2},{"location":"api/catalog/#deltalake.data_catalog.DataCatalog.UNITY","title":"UNITY  <code>class-attribute</code> <code>instance-attribute</code>","text":"<pre><code>UNITY = 'unity'\n</code></pre> <p>Refers to the <code>Databricks Unity Catalog &lt;https://docs.databricks.com/data-governance/unity-catalog/index.html&gt;</code>_</p>","boost":2},{"location":"api/delta_writer/","title":"Writer","text":"","boost":10},{"location":"api/delta_writer/#write-to-delta-tables","title":"Write to Delta Tables","text":"","boost":10},{"location":"api/delta_writer/#deltalake.write_deltalake","title":"deltalake.write_deltalake","text":"<pre><code>write_deltalake(table_or_uri: Union[str, Path, DeltaTable], data: Union[pd.DataFrame, ds.Dataset, pa.Table, pa.RecordBatch, Iterable[pa.RecordBatch], RecordBatchReader], *, schema: Optional[Union[pa.Schema, DeltaSchema]] = None, partition_by: Optional[Union[List[str], str]] = None, filesystem: Optional[pa_fs.FileSystem] = None, mode: Literal['error', 'append', 'overwrite', 'ignore'] = 'error', file_options: Optional[ds.ParquetFileWriteOptions] = None, max_partitions: Optional[int] = None, max_open_files: int = 1024, max_rows_per_file: int = 10 * 1024 * 1024, min_rows_per_group: int = 64 * 1024, max_rows_per_group: int = 128 * 1024, name: Optional[str] = None, description: Optional[str] = None, configuration: Optional[Mapping[str, Optional[str]]] = None, overwrite_schema: bool = False, storage_options: Optional[Dict[str, str]] = None, partition_filters: Optional[List[Tuple[str, str, Any]]] = None, predicate: Optional[str] = None, large_dtypes: bool = False, engine: Literal['pyarrow', 'rust'] = 'pyarrow', writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; None\n</code></pre> <p>Write to a Delta Lake table</p> <p>If the table does not already exist, it will be created.</p> <p>This function only supports writer protocol version 2 currently. When attempting to write to an existing table with a higher min_writer_version, this function will throw DeltaProtocolError.</p> <p>Note that this function does NOT register this table in a data catalog.</p> <p>A locking mechanism is needed to prevent unsafe concurrent writes to a delta lake directory when writing to S3. DynamoDB is the only available locking provider at the moment in delta-rs. To enable DynamoDB as the locking provider, you need to set the <code>AWS_S3_LOCKING_PROVIDER</code> to 'dynamodb' as a storage_option or as an environment variable.</p> <p>Additionally, you must create a DynamoDB table with the name 'delta_rs_lock_table' so that it can be automatically discovered by delta-rs. Alternatively, you can use a table name of your choice, but you must set the <code>DELTA_DYNAMO_TABLE_NAME</code> variable to match your chosen table name. The required schema for the DynamoDB table is as follows:</p> <ul> <li>Key Schema: AttributeName=key, KeyType=HASH</li> <li>Attribute Definitions: AttributeName=key, AttributeType=S</li> </ul> <p>Please note that this locking mechanism is not compatible with any other locking mechanisms, including the one used by Spark.</p> <p>Parameters:</p> Name Type Description Default <code>table_or_uri</code> <code>Union[str, Path, DeltaTable]</code> <p>URI of a table or a DeltaTable object.</p> required <code>data</code> <code>Union[DataFrame, Dataset, Table, RecordBatch, Iterable[RecordBatch], RecordBatchReader]</code> <p>Data to write. If passing iterable, the schema must also be given.</p> required <code>schema</code> <code>Optional[Union[Schema, Schema]]</code> <p>Optional schema to write.</p> <code>None</code> <code>partition_by</code> <code>Optional[Union[List[str], str]]</code> <p>List of columns to partition the table by. Only required when creating a new table.</p> <code>None</code> <code>filesystem</code> <code>Optional[FileSystem]</code> <p>Optional filesystem to pass to PyArrow. If not provided will be inferred from uri. The file system has to be rooted in the table root. Use the pyarrow.fs.SubTreeFileSystem, to adopt the root of pyarrow file systems.</p> <code>None</code> <code>mode</code> <code>Literal['error', 'append', 'overwrite', 'ignore']</code> <p>How to handle existing data. Default is to error if table already exists. If 'append', will add new data. If 'overwrite', will replace table with new data. If 'ignore', will not write anything if table already exists.</p> <code>'error'</code> <code>file_options</code> <code>Optional[ParquetFileWriteOptions]</code> <p>Optional write options for Parquet (ParquetFileWriteOptions). Can be provided with defaults using ParquetFileWriteOptions().make_write_options(). Please refer to https://github.com/apache/arrow/blob/master/python/pyarrow/_dataset_parquet.pyx#L492-L533 for the list of available options. Only used in pyarrow engine.</p> <code>None</code> <code>max_partitions</code> <code>Optional[int]</code> <p>the maximum number of partitions that will be used. Only used in pyarrow engine.</p> <code>None</code> <code>max_open_files</code> <code>int</code> <p>Limits the maximum number of files that can be left open while writing. If an attempt is made to open too many files then the least recently used file will be closed. If this setting is set too low you may end up fragmenting your data into many small files. Only used in pyarrow engine.</p> <code>1024</code> <code>max_rows_per_file</code> <code>int</code> <p>Maximum number of rows per file. If greater than 0 then this will limit how many rows are placed in any single file. Otherwise there will be no limit and one file will be created in each output directory unless files need to be closed to respect max_open_files min_rows_per_group: Minimum number of rows per group. When the value is set, the dataset writer will batch incoming data and only write the row groups to the disk when sufficient rows have accumulated. Only used in pyarrow engine.</p> <code>10 * 1024 * 1024</code> <code>max_rows_per_group</code> <code>int</code> <p>Maximum number of rows per group. If the value is set, then the dataset writer may split up large incoming batches into multiple row groups. If this value is set, then min_rows_per_group should also be set.</p> <code>128 * 1024</code> <code>name</code> <code>Optional[str]</code> <p>User-provided identifier for this table.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>User-provided description for this table.</p> <code>None</code> <code>configuration</code> <code>Optional[Mapping[str, Optional[str]]]</code> <p>A map containing configuration options for the metadata action.</p> <code>None</code> <code>overwrite_schema</code> <code>bool</code> <p>If True, allows updating the schema of the table.</p> <code>False</code> <code>storage_options</code> <code>Optional[Dict[str, str]]</code> <p>options passed to the native delta filesystem. Unused if 'filesystem' is defined.</p> <code>None</code> <code>predicate</code> <code>Optional[str]</code> <p>When using <code>Overwrite</code> mode, replace data that matches a predicate. Only used in rust engine.</p> <code>None</code> <code>partition_filters</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>the partition filters that will be used for partition overwrite. Only used in pyarrow engine.</p> <code>None</code> <code>large_dtypes</code> <code>bool</code> <p>If True, the data schema is kept in large_dtypes, has no effect on pandas dataframe input.</p> <code>False</code> <code>engine</code> <code>Literal['pyarrow', 'rust']</code> <p>writer engine to write the delta table. <code>Rust</code> engine is still experimental but you may see up to 4x performance improvements over pyarrow.</p> <code>'pyarrow'</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>Custom metadata to add to the commitInfo.</p> <code>None</code>","boost":10},{"location":"api/delta_writer/#deltalake.WriterProperties","title":"deltalake.WriterProperties  <code>dataclass</code>","text":"<pre><code>WriterProperties(data_page_size_limit: Optional[int] = None, dictionary_page_size_limit: Optional[int] = None, data_page_row_count_limit: Optional[int] = None, write_batch_size: Optional[int] = None, max_row_group_size: Optional[int] = None, compression: Optional[Literal['UNCOMPRESSED', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD', 'LZ4_RAW']] = None, compression_level: Optional[int] = None)\n</code></pre> <p>A Writer Properties instance for the Rust parquet writer.</p> <p>Create a Writer Properties instance for the Rust parquet writer:</p> <p>Parameters:</p> Name Type Description Default <code>data_page_size_limit</code> <code>Optional[int]</code> <p>Limit DataPage size to this in bytes.</p> <code>None</code> <code>dictionary_page_size_limit</code> <code>Optional[int]</code> <p>Limit the size of each DataPage to store dicts to this amount in bytes.</p> <code>None</code> <code>data_page_row_count_limit</code> <code>Optional[int]</code> <p>Limit the number of rows in each DataPage.</p> <code>None</code> <code>write_batch_size</code> <code>Optional[int]</code> <p>Splits internally to smaller batch size.</p> <code>None</code> <code>max_row_group_size</code> <code>Optional[int]</code> <p>Max number of rows in row group.</p> <code>None</code> <code>compression</code> <code>Optional[Literal['UNCOMPRESSED', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD', 'LZ4_RAW']]</code> <p>compression type.</p> <code>None</code> <code>compression_level</code> <code>Optional[int]</code> <p>If none and compression has a level, the default level will be used, only relevant for GZIP: levels (1-9), BROTLI: levels (1-11), ZSTD: levels (1-22),</p> <code>None</code>","boost":10},{"location":"api/delta_writer/#convert-to-delta-tables","title":"Convert to Delta Tables","text":"","boost":10},{"location":"api/delta_writer/#deltalake.convert_to_deltalake","title":"deltalake.convert_to_deltalake","text":"<pre><code>convert_to_deltalake(uri: Union[str, Path], mode: Literal['error', 'ignore'] = 'error', partition_by: Optional[pa.Schema] = None, partition_strategy: Optional[Literal['hive']] = None, name: Optional[str] = None, description: Optional[str] = None, configuration: Optional[Mapping[str, Optional[str]]] = None, storage_options: Optional[Dict[str, str]] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; None\n</code></pre> <p><code>Convert</code> parquet tables <code>to delta</code> tables.</p> <p>Currently only HIVE partitioned tables are supported. <code>Convert to delta</code> creates a transaction log commit with add actions, and additional properties provided such as configuration, name, and description.</p> <p>Parameters:</p> Name Type Description Default <code>uri</code> <code>Union[str, Path]</code> <p>URI of a table.</p> required <code>partition_by</code> <code>Optional[Schema]</code> <p>Optional partitioning schema if table is partitioned.</p> <code>None</code> <code>partition_strategy</code> <code>Optional[Literal['hive']]</code> <p>Optional partition strategy to read and convert</p> <code>None</code> <code>mode</code> <code>Literal['error', 'ignore']</code> <p>How to handle existing data. Default is to error if table already exists. If 'ignore', will not convert anything if table already exists.</p> <code>'error'</code> <code>name</code> <code>Optional[str]</code> <p>User-provided identifier for this table.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>User-provided description for this table.</p> <code>None</code> <code>configuration</code> <code>Optional[Mapping[str, Optional[str]]]</code> <p>A map containing configuration options for the metadata action.</p> <code>None</code> <code>storage_options</code> <code>Optional[Dict[str, str]]</code> <p>options passed to the native delta filesystem. Unused if 'filesystem' is defined.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit</p> <code>None</code>","boost":10},{"location":"api/exceptions/","title":"Exceptions","text":"","boost":2},{"location":"api/exceptions/#deltalake.exceptions.DeltaError","title":"deltalake.exceptions.DeltaError","text":"<p>             Bases: <code>builtins.Exception</code></p> <p>The base class for Delta-specific errors.</p>","boost":2},{"location":"api/exceptions/#deltalake.exceptions.DeltaProtocolError","title":"deltalake.exceptions.DeltaProtocolError","text":"<p>             Bases: <code>_internal.DeltaError</code></p> <p>Raised when a violation with the Delta protocol specs ocurred.</p>","boost":2},{"location":"api/exceptions/#deltalake.exceptions.TableNotFoundError","title":"deltalake.exceptions.TableNotFoundError","text":"<p>             Bases: <code>_internal.DeltaError</code></p> <p>Raised when a Delta table cannot be loaded from a location.</p>","boost":2},{"location":"api/exceptions/#deltalake.exceptions.CommitFailedError","title":"deltalake.exceptions.CommitFailedError","text":"<p>             Bases: <code>_internal.DeltaError</code></p> <p>Raised when a commit to a Delta table fails.</p>","boost":2},{"location":"api/schema/","title":"Schema","text":"","boost":2},{"location":"api/schema/#schema-and-field","title":"Schema and field","text":"<p>Schemas, fields, and data types are provided in the <code>deltalake.schema</code> submodule.</p>","boost":2},{"location":"api/schema/#deltalake.Schema","title":"deltalake.Schema","text":"<pre><code>Schema(fields: List[Field])\n</code></pre> <p>             Bases: <code>deltalake._internal.StructType</code></p> <p>A Delta Lake schema</p> <p>Create using a list of :class:<code>Field</code>:</p> <p>Schema([Field(\"x\", \"integer\"), Field(\"y\", \"string\")]) Schema([Field(x, PrimitiveType(\"integer\"), nullable=True), Field(y, PrimitiveType(\"string\"), nullable=True)])</p> <p>Or create from a PyArrow schema:</p> <p>import pyarrow as pa Schema.from_pyarrow(pa.schema({\"x\": pa.int32(), \"y\": pa.string()})) Schema([Field(x, PrimitiveType(\"integer\"), nullable=True), Field(y, PrimitiveType(\"string\"), nullable=True)])</p>","boost":2},{"location":"api/schema/#deltalake.Schema.invariants","title":"invariants","text":"<pre><code>invariants: List[Tuple[str, str]] = &lt;attribute 'invariants' of 'deltalake._internal.Schema' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Schema.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(schema_json) -&gt; Schema\n</code></pre> <p>Create a new Schema from a JSON string.</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required Example <p>A schema has the same JSON format as a StructType. <pre><code>Schema.from_json('''{\n    \"type\": \"struct\",\n    \"fields\": [{\"name\": \"x\", \"type\": \"integer\", \"nullable\": true, \"metadata\": {}}]\n    }\n)'''\n# Returns Schema([Field(x, PrimitiveType(\"integer\"), nullable=True)])\n</code></pre></p>","boost":2},{"location":"api/schema/#deltalake.Schema.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; Schema\n</code></pre> <p>Create a Schema from a PyArrow Schema type</p> <p>Will raise <code>TypeError</code> if the PyArrow type is not a primitive type.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>Schema</code> <p>A PyArrow Schema</p> required <p>Returns:</p> Type Description <code>Schema</code> <p>a Schema</p>","boost":2},{"location":"api/schema/#deltalake.Schema.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get the JSON string representation of the Schema.</p> <p>Returns:</p> Type Description <code>str</code> <p>a JSON string</p> Example <p>A schema has the same JSON format as a StructType. <pre><code>Schema([Field(\"x\", \"integer\")]).to_json()\n# Returns '{\"type\":\"struct\",\"fields\":[{\"name\":\"x\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}'\n</code></pre></p>","boost":2},{"location":"api/schema/#deltalake.Schema.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow(as_large_types: bool = False) -&gt; pyarrow.Schema\n</code></pre> <p>Return equivalent PyArrow schema</p> <p>Parameters:</p> Name Type Description Default <code>as_large_types</code> <code>bool</code> <p>get schema with all variable size types (list, binary, string) as large variants (with int64 indices). This is for compatibility with systems like Polars that only support the large versions of Arrow types.</p> <code>False</code> <p>Returns:</p> Type Description <code>Schema</code> <p>a PyArrow Schema</p>","boost":2},{"location":"api/schema/#deltalake.Field","title":"deltalake.Field","text":"<pre><code>Field(name: str, type: DataType, *, nullable: bool = True, metadata: Optional[Dict[str, Any]] = None)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.metadata","title":"metadata","text":"<pre><code>metadata: Dict[str, Any] = &lt;attribute 'metadata' of 'deltalake._internal.Field' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.name","title":"name","text":"<pre><code>name: str = &lt;attribute 'name' of 'deltalake._internal.Field' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.nullable","title":"nullable","text":"<pre><code>nullable: bool = &lt;attribute 'nullable' of 'deltalake._internal.Field' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.type","title":"type","text":"<pre><code>type: DataType = &lt;attribute 'type' of 'deltalake._internal.Field' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(field_json) -&gt; Field\n</code></pre> <p>Create a Field from a JSON string.</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>the JSON string.</p> required <p>Returns:</p> Type Description <code>Field</code> <p>Field</p> Example <pre><code>Field.from_json('''{\n        \"name\": \"col\",\n        \"type\": \"integer\",\n        \"nullable\": true,\n        \"metadata\": {}\n    }'''\n)\n# Returns Field(col, PrimitiveType(\"integer\"), nullable=True)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(field: pyarrow.Field) -&gt; Field\n</code></pre> <p>Create a Field from a PyArrow field Note: This currently doesn't preserve field metadata.</p> <p>Parameters:</p> Name Type Description Default <code>field</code> <code>Field</code> <p>a PyArrow Field</p> required <p>Returns:</p> Type Description <code>Field</code> <p>a Field</p>","boost":2},{"location":"api/schema/#deltalake.Field.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get the field as JSON string.</p> <p>Returns:</p> Type Description <code>str</code> <p>a JSON string</p> Example <pre><code>Field(\"col\", \"integer\").to_json()\n# Returns '{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}'\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.Field.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.Field\n</code></pre> <p>Convert to an equivalent PyArrow field Note: This currently doesn't preserve field metadata.</p> <p>Returns:</p> Type Description <code>Field</code> <p>a pyarrow Field</p>","boost":2},{"location":"api/schema/#data-types","title":"Data types","text":"","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType","title":"deltalake.schema.PrimitiveType","text":"<pre><code>PrimitiveType(data_type: str)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType.type","title":"type","text":"<pre><code>type: str = &lt;attribute 'type' of 'deltalake._internal.PrimitiveType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(type_json) -&gt; PrimitiveType\n</code></pre> <p>Create a PrimitiveType from a JSON string</p> <p>The JSON representation for a primitive type is just a quoted string: <code>PrimitiveType.from_json('\"integer\"')</code></p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required <p>Returns:</p> Type Description <code>PrimitiveType</code> <p>a PrimitiveType type</p>","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; PrimitiveType\n</code></pre> <p>Create a PrimitiveType from a PyArrow datatype</p> <p>Will raise <code>TypeError</code> if the PyArrow type is not a primitive type.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>DataType</code> <p>A PyArrow DataType</p> required <p>Returns:</p> Type Description <code>PrimitiveType</code> <p>a PrimitiveType</p>","boost":2},{"location":"api/schema/#deltalake.schema.PrimitiveType.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.DataType\n</code></pre> <p>Get the equivalent PyArrow type (pyarrow.DataType)</p>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType","title":"deltalake.schema.ArrayType","text":"<pre><code>ArrayType(element_type: DataType, *, contains_null: bool = True)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.contains_null","title":"contains_null","text":"<pre><code>contains_null: bool = &lt;attribute 'contains_null' of 'deltalake._internal.ArrayType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.element_type","title":"element_type","text":"<pre><code>element_type: DataType = &lt;attribute 'element_type' of 'deltalake._internal.ArrayType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.type","title":"type","text":"<pre><code>type: Literal['array'] = &lt;attribute 'type' of 'deltalake._internal.ArrayType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(type_json) -&gt; ArrayType\n</code></pre> <p>Create an ArrayType from a JSON string</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required <p>Returns:</p> Type Description <code>ArrayType</code> <p>an ArrayType</p> Example <p>The JSON representation for an array type is an object with <code>type</code> (set to <code>\"array\"</code>), <code>elementType</code>, and <code>containsNull</code>. <pre><code>ArrayType.from_json(\n    '''{\n        \"type\": \"array\",\n        \"elementType\": \"integer\",\n        \"containsNull\": false\n    }'''\n)\n# Returns ArrayType(PrimitiveType(\"integer\"), contains_null=False)\n</code></pre></p>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; ArrayType\n</code></pre> <p>Create an ArrayType from a pyarrow.ListType.</p> <p>Will raise <code>TypeError</code> if a different PyArrow DataType is provided.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>ListType</code> <p>The PyArrow ListType</p> required <p>Returns:</p> Type Description <code>ArrayType</code> <p>an ArrayType</p>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get the JSON string representation of the type.</p>","boost":2},{"location":"api/schema/#deltalake.schema.ArrayType.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.ListType\n</code></pre> <p>Get the equivalent PyArrow type.</p>","boost":2},{"location":"api/schema/#deltalake.schema.MapType","title":"deltalake.schema.MapType","text":"<pre><code>MapType(key_type: DataType, value_type: DataType, *, value_contains_null: bool = True)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.key_type","title":"key_type","text":"<pre><code>key_type: DataType = &lt;attribute 'key_type' of 'deltalake._internal.MapType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.type","title":"type","text":"<pre><code>type: Literal['map'] = &lt;attribute 'type' of 'deltalake._internal.MapType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.value_contains_null","title":"value_contains_null","text":"<pre><code>value_contains_null: bool = &lt;attribute 'value_contains_null' of 'deltalake._internal.MapType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.value_type","title":"value_type","text":"<pre><code>value_type: DataType = &lt;attribute 'value_type' of 'deltalake._internal.MapType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(type_json) -&gt; MapType\n</code></pre> <p>Create a MapType from a JSON string</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required <p>Returns:</p> Type Description <code>MapType</code> <p>an ArrayType</p> Example <p>The JSON representation for a map type is an object with <code>type</code> (set to <code>map</code>), <code>keyType</code>, <code>valueType</code>, and <code>valueContainsNull</code>:</p> <pre><code>MapType.from_json(\n    '''{\n        \"type\": \"map\",\n        \"keyType\": \"integer\",\n        \"valueType\": \"string\",\n        \"valueContainsNull\": true\n    }'''\n)\n# Returns MapType(PrimitiveType(\"integer\"), PrimitiveType(\"string\"), value_contains_null=True)\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; MapType\n</code></pre> <p>Create a MapType from a PyArrow MapType.</p> <p>Will raise <code>TypeError</code> if passed a different type.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>MapType</code> <p>the PyArrow MapType</p> required <p>Returns:</p> Type Description <code>MapType</code> <p>a MapType</p>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get JSON string representation of map type.</p> <p>Returns:</p> Type Description <code>str</code> <p>a JSON string</p>","boost":2},{"location":"api/schema/#deltalake.schema.MapType.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.MapType\n</code></pre> <p>Get the equivalent PyArrow data type.</p>","boost":2},{"location":"api/schema/#deltalake.schema.StructType","title":"deltalake.schema.StructType","text":"<pre><code>StructType(fields: List[Field])\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.fields","title":"fields","text":"<pre><code>fields: List[Field] = &lt;attribute 'fields' of 'deltalake._internal.StructType' objects&gt;\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.type","title":"type","text":"<pre><code>type: Literal['struct'] = &lt;attribute 'type' of 'deltalake._internal.StructType' objects&gt;\n</code></pre> <p>The string \"struct\"</p>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.from_json","title":"from_json  <code>staticmethod</code>","text":"<pre><code>from_json(type_json) -&gt; StructType\n</code></pre> <p>Create a new StructType from a JSON string.</p> <p>Parameters:</p> Name Type Description Default <code>json</code> <code>str</code> <p>a JSON string</p> required <p>Returns:</p> Type Description <code>StructType</code> <p>a StructType</p> Example <pre><code>StructType.from_json(\n    '''{\n        \"type\": \"struct\",\n        \"fields\": [{\"name\": \"x\", \"type\": \"integer\", \"nullable\": true, \"metadata\": {}}]\n    }'''\n)\n# Returns StructType([Field(x, PrimitiveType(\"integer\"), nullable=True)])\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.from_pyarrow","title":"from_pyarrow  <code>staticmethod</code>","text":"<pre><code>from_pyarrow(data_type) -&gt; StructType\n</code></pre> <p>Create a new StructType from a PyArrow struct type.</p> <p>Will raise <code>TypeError</code> if a different data type is provided.</p> <p>Parameters:</p> Name Type Description Default <code>type</code> <code>StructType</code> <p>a PyArrow struct type.</p> required <p>Returns:</p> Type Description <code>StructType</code> <p>a StructType</p>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.to_json","title":"to_json  <code>method descriptor</code>","text":"<pre><code>to_json() -&gt; str\n</code></pre> <p>Get the JSON representation of the type.</p> <p>Returns:</p> Type Description <code>str</code> <p>a JSON string</p> Example <pre><code>StructType([Field(\"x\", \"integer\")]).to_json()\n# Returns '{\"type\":\"struct\",\"fields\":[{\"name\":\"x\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}'\n</code></pre>","boost":2},{"location":"api/schema/#deltalake.schema.StructType.to_pyarrow","title":"to_pyarrow  <code>method descriptor</code>","text":"<pre><code>to_pyarrow() -&gt; pyarrow.StructType\n</code></pre> <p>Get the equivalent PyArrow StructType</p> <p>Returns:</p> Type Description <code>StructType</code> <p>a PyArrow StructType</p>","boost":2},{"location":"api/storage/","title":"Storage","text":"<p>The delta filesystem handler for the pyarrow engine writer.</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler","title":"deltalake.fs.DeltaStorageHandler","text":"<pre><code>DeltaStorageHandler(root: str, options: dict[str, str] | None = None, known_sizes: dict[str, int] | None = None)\n</code></pre> <p>             Bases: <code>DeltaFileSystemHandler</code>, <code>FileSystemHandler</code></p> <p>DeltaStorageHandler is a concrete implementations of a PyArrow FileSystemHandler.</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler.get_file_info_selector","title":"get_file_info_selector","text":"<pre><code>get_file_info_selector(selector: FileSelector) -&gt; List[FileInfo]\n</code></pre> <p>Get info for the files defined by FileSelector.</p> <p>Parameters:</p> Name Type Description Default <code>selector</code> <code>FileSelector</code> <p>FileSelector object</p> required <p>Returns:</p> Type Description <code>List[FileInfo]</code> <p>list of file info objects</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler.open_input_file","title":"open_input_file","text":"<pre><code>open_input_file(path: str) -&gt; pa.PythonFile\n</code></pre> <p>Open an input file for random access reading.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The source to open for reading.</p> required <p>Returns:</p> Type Description <code>PythonFile</code> <p>NativeFile</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler.open_input_stream","title":"open_input_stream","text":"<pre><code>open_input_stream(path: str) -&gt; pa.PythonFile\n</code></pre> <p>Open an input stream for sequential reading.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The source to open for reading.</p> required <p>Returns:</p> Type Description <code>PythonFile</code> <p>NativeFile</p>","boost":2},{"location":"api/storage/#deltalake.fs.DeltaStorageHandler.open_output_stream","title":"open_output_stream","text":"<pre><code>open_output_stream(path: str, metadata: Optional[Dict[str, str]] = None) -&gt; pa.PythonFile\n</code></pre> <p>Open an output stream for sequential writing.</p> <p>If the target already exists, existing data is truncated.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>str</code> <p>The source to open for writing.</p> required <code>metadata</code> <code>Optional[Dict[str, str]]</code> <p>If not None, a mapping of string keys to string values.</p> <code>None</code> <p>Returns:</p> Type Description <code>PythonFile</code> <p>NativeFile</p>","boost":2},{"location":"api/delta_table/","title":"DeltaTable","text":"","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable","title":"deltalake.DeltaTable  <code>dataclass</code>","text":"<pre><code>DeltaTable(table_uri: Union[str, Path, os.PathLike[str]], version: Optional[int] = None, storage_options: Optional[Dict[str, str]] = None, without_files: bool = False, log_buffer_size: Optional[int] = None)\n</code></pre> <p>Represents a Delta Table</p> <p>Create the Delta Table from a path with an optional version. Multiple StorageBackends are currently supported: AWS S3, Azure Data Lake Storage Gen2, Google Cloud Storage (GCS) and local URI. Depending on the storage backend used, you could provide options values using the <code>storage_options</code> parameter.</p> <p>Parameters:</p> Name Type Description Default <code>table_uri</code> <code>Union[str, Path, PathLike[str]]</code> <p>the path of the DeltaTable</p> required <code>version</code> <code>Optional[int]</code> <p>version of the DeltaTable</p> <code>None</code> <code>storage_options</code> <code>Optional[Dict[str, str]]</code> <p>a dictionary of the options to use for the storage backend</p> <code>None</code> <code>without_files</code> <code>bool</code> <p>If True, will load table without tracking files.                 Some append-only applications might have no need of tracking any files. So, the                 DeltaTable will be loaded with a significant memory reduction.</p> <code>False</code> <code>log_buffer_size</code> <code>Optional[int]</code> <p>Number of files to buffer when reading the commit log. A positive integer.                 Setting a value greater than 1 results in concurrent calls to the storage api.                 This can decrease latency if there are many files in the log since the last checkpoint,                 but will also increase memory usage. Possible rate limits of the storage backend should                 also be considered for optimal performance. Defaults to 4 * number of cpus.</p> <code>None</code>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.alter","title":"alter  <code>property</code>","text":"<pre><code>alter: TableAlterer\n</code></pre> <p>Namespace for all table alter related methods.</p> <p>Returns:</p> Name Type Description <code>TableAlterer</code> <code>TableAlterer</code> <p>TableAlterer Object</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.optimize","title":"optimize  <code>property</code>","text":"<pre><code>optimize: TableOptimizer\n</code></pre> <p>Namespace for all table optimize related methods.</p> <p>Returns:</p> Name Type Description <code>TableOptimizer</code> <code>TableOptimizer</code> <p>TableOptimizer Object</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.cleanup_metadata","title":"cleanup_metadata","text":"<pre><code>cleanup_metadata() -&gt; None\n</code></pre> <p>Delete expired log files before current version from table. The table log retention is based on the <code>configuration.logRetentionDuration</code> value, 30 days by default.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.create","title":"create  <code>classmethod</code>","text":"<pre><code>create(table_uri: Union[str, Path], schema: Union[pyarrow.Schema, DeltaSchema], mode: Literal['error', 'append', 'overwrite', 'ignore'] = 'error', partition_by: Optional[Union[List[str], str]] = None, name: Optional[str] = None, description: Optional[str] = None, configuration: Optional[Mapping[str, Optional[str]]] = None, storage_options: Optional[Dict[str, str]] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; DeltaTable\n</code></pre> <p><code>CREATE</code> or <code>CREATE_OR_REPLACE</code> a delta table given a table_uri.</p> <p>Parameters:</p> Name Type Description Default <code>table_uri</code> <code>Union[str, Path]</code> <p>URI of a table</p> required <code>schema</code> <code>Union[Schema, Schema]</code> <p>Table schema</p> required <code>mode</code> <code>Literal['error', 'append', 'overwrite', 'ignore']</code> <p>How to handle existing data. Default is to error if table already exists. If 'append', returns not support error if table exists. If 'overwrite', will <code>CREATE_OR_REPLACE</code> table. If 'ignore', will not do anything if table already exists. Defaults to \"error\".</p> <code>'error'</code> <code>partition_by</code> <code>Optional[Union[List[str], str]]</code> <p>List of columns to partition the table by.</p> <code>None</code> <code>name</code> <code>Optional[str]</code> <p>User-provided identifier for this table.</p> <code>None</code> <code>description</code> <code>Optional[str]</code> <p>User-provided description for this table.</p> <code>None</code> <code>configuration</code> <code>Optional[Mapping[str, Optional[str]]]</code> <p>A map containing configuration options for the metadata action.</p> <code>None</code> <code>storage_options</code> <code>Optional[Dict[str, str]]</code> <p>options passed to the object store crate.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>DeltaTable</code> <code>DeltaTable</code> <p>created delta table</p> Example <pre><code>import pyarrow as pa\n\nfrom deltalake import DeltaTable\n\ndt = DeltaTable.create(\n    table_uri=\"my_local_table\",\n    schema=pa.schema(\n        [pa.field(\"foo\", pa.string()), pa.field(\"bar\", pa.string())]\n    ),\n    mode=\"error\",\n    partition_by=\"bar\",\n)\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.delete","title":"delete","text":"<pre><code>delete(predicate: Optional[str] = None, writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Delete records from a Delta Table that statisfy a predicate.</p> <p>When a predicate is not provided then all records are deleted from the Delta Table. Otherwise a scan of the Delta table is performed to mark any files that contain records that satisfy the predicate. Once files are determined they are rewritten without the records.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>Optional[str]</code> <p>a SQL where clause. If not passed, will delete all rows.</p> <code>None</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Type Description <code>Dict[str, Any]</code> <p>the metrics from delete.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.file_uris","title":"file_uris","text":"<pre><code>file_uris(partition_filters: Optional[List[Tuple[str, str, Any]]] = None) -&gt; List[str]\n</code></pre> <p>Get the list of files as absolute URIs, including the scheme (e.g. \"s3://\").</p> <p>Local files will be just plain absolute paths, without a scheme. (That is, no 'file://' prefix.)</p> <p>Use the partition_filters parameter to retrieve a subset of files that match the given filters.</p> <p>Parameters:</p> Name Type Description Default <code>partition_filters</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>the partition filters that will be used for getting the matched files</p> <code>None</code> <p>Returns:</p> Type Description <code>List[str]</code> <p>list of the .parquet files with an absolute URI referenced for the current version of the DeltaTable</p> <p>Predicates are expressed in disjunctive normal form (DNF), like [(\"x\", \"=\", \"a\"), ...]. DNF allows arbitrary boolean logical combinations of single partition predicates. The innermost tuples each describe a single partition predicate. The list of inner predicates is interpreted as a conjunction (AND), forming a more selective and multiple partition predicates. Each tuple has format: (key, op, value) and compares the key with the value. The supported op are: <code>=</code>, <code>!=</code>, <code>in</code>, and <code>not in</code>. If the op is in or not in, the value must be a collection such as a list, a set or a tuple. The supported type for value is str. Use empty string <code>''</code> for Null partition value.</p> Example <pre><code>(\"x\", \"=\", \"a\")\n(\"x\", \"!=\", \"a\")\n(\"y\", \"in\", [\"a\", \"b\", \"c\"])\n(\"z\", \"not in\", [\"a\",\"b\"])\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.files","title":"files","text":"<pre><code>files(partition_filters: Optional[List[Tuple[str, str, Any]]] = None) -&gt; List[str]\n</code></pre> <p>Get the .parquet files of the DeltaTable.</p> <p>The paths are as they are saved in the delta log, which may either be relative to the table root or absolute URIs.</p> <p>Parameters:</p> Name Type Description Default <code>partition_filters</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>the partition filters that will be used for                 getting the matched files</p> <code>None</code> <p>Returns:</p> Type Description <code>List[str]</code> <p>list of the .parquet files referenced for the current version of the DeltaTable</p> <p>Predicates are expressed in disjunctive normal form (DNF), like [(\"x\", \"=\", \"a\"), ...]. DNF allows arbitrary boolean logical combinations of single partition predicates. The innermost tuples each describe a single partition predicate. The list of inner predicates is interpreted as a conjunction (AND), forming a more selective and multiple partition predicates. Each tuple has format: (key, op, value) and compares the key with the value. The supported op are: <code>=</code>, <code>!=</code>, <code>in</code>, and <code>not in</code>. If the op is in or not in, the value must be a collection such as a list, a set or a tuple. The supported type for value is str. Use empty string <code>''</code> for Null partition value.</p> Example <pre><code>(\"x\", \"=\", \"a\")\n(\"x\", \"!=\", \"a\")\n(\"y\", \"in\", [\"a\", \"b\", \"c\"])\n(\"z\", \"not in\", [\"a\",\"b\"])\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.from_data_catalog","title":"from_data_catalog  <code>classmethod</code>","text":"<pre><code>from_data_catalog(data_catalog: DataCatalog, database_name: str, table_name: str, data_catalog_id: Optional[str] = None, version: Optional[int] = None, log_buffer_size: Optional[int] = None) -&gt; DeltaTable\n</code></pre> <p>Create the Delta Table from a Data Catalog.</p> <p>Parameters:</p> Name Type Description Default <code>data_catalog</code> <code>DataCatalog</code> <p>the Catalog to use for getting the storage location of the Delta Table</p> required <code>database_name</code> <code>str</code> <p>the database name inside the Data Catalog</p> required <code>table_name</code> <code>str</code> <p>the table name inside the Data Catalog</p> required <code>data_catalog_id</code> <code>Optional[str]</code> <p>the identifier of the Data Catalog</p> <code>None</code> <code>version</code> <code>Optional[int]</code> <p>version of the DeltaTable</p> <code>None</code> <code>log_buffer_size</code> <code>Optional[int]</code> <p>Number of files to buffer when reading the commit log. A positive integer.                 Setting a value greater than 1 results in concurrent calls to the storage api.                 This can decrease latency if there are many files in the log since the last checkpoint,                 but will also increase memory usage. Possible rate limits of the storage backend should                 also be considered for optimal performance. Defaults to 4 * number of cpus.</p> <code>None</code>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.get_add_actions","title":"get_add_actions","text":"<pre><code>get_add_actions(flatten: bool = False) -&gt; pyarrow.RecordBatch\n</code></pre> <p>Return a dataframe with all current add actions.</p> <p>Add actions represent the files that currently make up the table. This data is a low-level representation parsed from the transaction log.</p> <p>Parameters:</p> Name Type Description Default <code>flatten</code> <code>bool</code> <p>whether to flatten the schema. Partition values columns are         given the prefix <code>partition.</code>, statistics (null_count, min, and max) are         given the prefix <code>null_count.</code>, <code>min.</code>, and <code>max.</code>, and tags the         prefix <code>tags.</code>. Nested field names are concatenated with <code>.</code>.</p> <code>False</code> <p>Returns:</p> Type Description <code>RecordBatch</code> <p>a PyArrow RecordBatch containing the add action data.</p> Example <pre><code>from pprint import pprint\nfrom deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data, partition_by=[\"x\"])\ndt = DeltaTable(\"tmp\")\ndf = dt.get_add_actions().to_pandas()\ndf[\"path\"].sort_values(ignore_index=True)\n0    x=1/0\n1    x=2/0\n2    x=3/0\n</code></pre> <pre><code>df = dt.get_add_actions(flatten=True).to_pandas()\ndf[\"partition.x\"].sort_values(ignore_index=True)\n0    1\n1    2\n2    3\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.history","title":"history","text":"<pre><code>history(limit: Optional[int] = None) -&gt; List[Dict[str, Any]]\n</code></pre> <p>Run the history command on the DeltaTable. The operations are returned in reverse chronological order.</p> <p>Parameters:</p> Name Type Description Default <code>limit</code> <code>Optional[int]</code> <p>the commit info limit to return</p> <code>None</code> <p>Returns:</p> Type Description <code>List[Dict[str, Any]]</code> <p>list of the commit infos registered in the transaction log</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.load_as_version","title":"load_as_version","text":"<pre><code>load_as_version(version: Union[int, str, datetime]) -&gt; None\n</code></pre> <p>Load/time travel a DeltaTable to a specified version number, or a timestamp version of the table. If a string is passed then the argument should be an RFC 3339 and ISO 8601 date and time string format.</p> <p>Parameters:</p> Name Type Description Default <code>version</code> <code>Union[int, str, datetime]</code> <p>the identifier of the version of the DeltaTable to load</p> required Example <p>Use a version number <pre><code>dt = DeltaTable(\"test_table\")\ndt.load_as_version(1)\n</code></pre></p> <p>Use a datetime object <pre><code>dt.load_as_version(datetime(2023,1,1))\n</code></pre></p> <p>Use a datetime in string format <pre><code>dt.load_as_version(\"2018-01-26T18:30:09Z\")\ndt.load_as_version(\"2018-12-19T16:39:57-08:00\")\ndt.load_as_version(\"2018-01-26T18:30:09.453+00:00\")\n</code></pre></p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.load_version","title":"load_version","text":"<pre><code>load_version(version: int) -&gt; None\n</code></pre> <p>Load a DeltaTable with a specified version.</p> <p>Deprecated</p> <p>Load_version and load_with_datetime have been combined into <code>DeltaTable.load_as_version</code>.</p> <p>Parameters:</p> Name Type Description Default <code>version</code> <code>int</code> <p>the identifier of the version of the DeltaTable to load</p> required","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.load_with_datetime","title":"load_with_datetime","text":"<pre><code>load_with_datetime(datetime_string: str) -&gt; None\n</code></pre> <p>Time travel Delta table to the latest version that's created at or before provided <code>datetime_string</code> argument. The <code>datetime_string</code> argument should be an RFC 3339 and ISO 8601 date and time string.</p> <p>Deprecated</p> <p>Load_version and load_with_datetime have been combined into <code>DeltaTable.load_as_version</code>.</p> <p>Parameters:</p> Name Type Description Default <code>datetime_string</code> <code>str</code> <p>the identifier of the datetime point of the DeltaTable to load</p> required Example <pre><code>\"2018-01-26T18:30:09Z\"\n\"2018-12-19T16:39:57-08:00\"\n\"2018-01-26T18:30:09.453+00:00\"\n</code></pre>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.merge","title":"merge","text":"<pre><code>merge(source: Union[pyarrow.Table, pyarrow.RecordBatch, pyarrow.RecordBatchReader, ds.Dataset, pandas.DataFrame], predicate: str, source_alias: Optional[str] = None, target_alias: Optional[str] = None, error_on_type_mismatch: bool = True, writer_properties: Optional[WriterProperties] = None, large_dtypes: bool = True, custom_metadata: Optional[Dict[str, str]] = None) -&gt; TableMerger\n</code></pre> <p>Pass the source data which you want to merge on the target delta table, providing a predicate in SQL query like format. You can also specify on what to do when the underlying data types do not match the underlying table.</p> <p>Parameters:</p> Name Type Description Default <code>source</code> <code>Union[Table, RecordBatch, RecordBatchReader, Dataset, DataFrame]</code> <p>source data</p> required <code>predicate</code> <code>str</code> <p>SQL like predicate on how to merge</p> required <code>source_alias</code> <code>Optional[str]</code> <p>Alias for the source table</p> <code>None</code> <code>target_alias</code> <code>Optional[str]</code> <p>Alias for the target table</p> <code>None</code> <code>error_on_type_mismatch</code> <code>bool</code> <p>specify if merge will return error if data types are mismatching :default = True</p> <code>True</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer</p> <code>None</code> <code>large_dtypes</code> <code>bool</code> <p>If True, the data schema is kept in large_dtypes.</p> <code>True</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.metadata","title":"metadata","text":"<pre><code>metadata() -&gt; Metadata\n</code></pre> <p>Get the current metadata of the DeltaTable.</p> <p>Returns:</p> Type Description <code>Metadata</code> <p>the current Metadata registered in the transaction log</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.protocol","title":"protocol","text":"<pre><code>protocol() -&gt; ProtocolVersions\n</code></pre> <p>Get the reader and writer protocol versions of the DeltaTable.</p> <p>Returns:</p> Type Description <code>ProtocolVersions</code> <p>the current ProtocolVersions registered in the transaction log</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.repair","title":"repair","text":"<pre><code>repair(dry_run: bool = False, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Repair the Delta Table by auditing active files that do not exist in the underlying filesystem and removes them. This can be useful when there are accidental deletions or corrupted files.</p> <p>Active files are ones that have an add action in the log, but no corresponding remove action. This operation creates a new FSCK transaction containing a remove action for each of the missing or corrupted files.</p> <p>Parameters:</p> Name Type Description Default <code>dry_run</code> <code>bool</code> <p>when activated, list only the files, otherwise add remove actions to transaction log. Defaults to False.</p> <code>False</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:     The metrics from repair (FSCK) action.</p> Example <p><pre><code>from deltalake import DeltaTable\ndt = DeltaTable('TEST')\ndt.repair(dry_run=False)\n</code></pre> Results in <pre><code>{'dry_run': False, 'files_removed': ['6-0d084325-6885-4847-b008-82c1cf30674c-0.parquet', 5-4fba1d3e-3e20-4de1-933d-a8e13ac59f53-0.parquet']}\n</code></pre></p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.restore","title":"restore","text":"<pre><code>restore(target: Union[int, datetime, str], *, ignore_missing_files: bool = False, protocol_downgrade_allowed: bool = False, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Run the Restore command on the Delta Table: restore table to a given version or datetime.</p> <p>Parameters:</p> Name Type Description Default <code>target</code> <code>Union[int, datetime, str]</code> <p>the expected version will restore, which represented by int, date str or datetime.</p> required <code>ignore_missing_files</code> <code>bool</code> <p>whether the operation carry on when some data files missing.</p> <code>False</code> <code>protocol_downgrade_allowed</code> <code>bool</code> <p>whether the operation when protocol version upgraded.</p> <code>False</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Type Description <code>Dict[str, Any]</code> <p>the metrics from restore.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.schema","title":"schema","text":"<pre><code>schema() -&gt; DeltaSchema\n</code></pre> <p>Get the current schema of the DeltaTable.</p> <p>Returns:</p> Type Description <code>Schema</code> <p>the current Schema registered in the transaction log</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.to_pandas","title":"to_pandas","text":"<pre><code>to_pandas(partitions: Optional[List[Tuple[str, str, Any]]] = None, columns: Optional[List[str]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, filters: Optional[FilterType] = None) -&gt; pandas.DataFrame\n</code></pre> <p>Build a pandas dataframe using data from the DeltaTable.</p> <p>Parameters:</p> Name Type Description Default <code>partitions</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax</p> <code>None</code> <code>columns</code> <code>Optional[List[str]]</code> <p>The columns to project. This can be a list of column names to include (order and duplicates will be preserved)</p> <code>None</code> <code>filesystem</code> <code>Optional[Union[str, FileSystem]]</code> <p>A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem</p> <code>None</code> <code>filters</code> <code>Optional[FilterType]</code> <p>A disjunctive normal form (DNF) predicate for filtering rows. If you pass a filter you do not need to pass <code>partitions</code></p> <code>None</code>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.to_pyarrow_dataset","title":"to_pyarrow_dataset","text":"<pre><code>to_pyarrow_dataset(partitions: Optional[List[Tuple[str, str, Any]]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, parquet_read_options: Optional[ParquetReadOptions] = None) -&gt; pyarrow.dataset.Dataset\n</code></pre> <p>Build a PyArrow Dataset using data from the DeltaTable.</p> <p>Parameters:</p> Name Type Description Default <code>partitions</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax</p> <code>None</code> <code>filesystem</code> <code>Optional[Union[str, FileSystem]]</code> <p>A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem</p> <code>None</code> <code>parquet_read_options</code> <code>Optional[ParquetReadOptions]</code> <p>Optional read options for Parquet. Use this to handle INT96 to timestamp conversion for edge cases like 0001-01-01 or 9999-12-31</p> <code>None</code> <p>More info: https://arrow.apache.org/docs/python/generated/pyarrow.dataset.ParquetReadOptions.html</p> <p>Returns:</p> Type Description <code>Dataset</code> <p>the PyArrow dataset in PyArrow</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.to_pyarrow_table","title":"to_pyarrow_table","text":"<pre><code>to_pyarrow_table(partitions: Optional[List[Tuple[str, str, Any]]] = None, columns: Optional[List[str]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, filters: Optional[FilterType] = None) -&gt; pyarrow.Table\n</code></pre> <p>Build a PyArrow Table using data from the DeltaTable.</p> <p>Parameters:</p> Name Type Description Default <code>partitions</code> <code>Optional[List[Tuple[str, str, Any]]]</code> <p>A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax</p> <code>None</code> <code>columns</code> <code>Optional[List[str]]</code> <p>The columns to project. This can be a list of column names to include (order and duplicates will be preserved)</p> <code>None</code> <code>filesystem</code> <code>Optional[Union[str, FileSystem]]</code> <p>A concrete implementation of the Pyarrow FileSystem or a fsspec-compatible interface. If None, the first file path will be used to determine the right FileSystem</p> <code>None</code> <code>filters</code> <code>Optional[FilterType]</code> <p>A disjunctive normal form (DNF) predicate for filtering rows. If you pass a filter you do not need to pass <code>partitions</code></p> <code>None</code>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.update","title":"update","text":"<pre><code>update(updates: Optional[Dict[str, str]] = None, new_values: Optional[Dict[str, Union[int, float, str, datetime, bool, List[Any]]]] = None, predicate: Optional[str] = None, writer_properties: Optional[WriterProperties] = None, error_on_type_mismatch: bool = True, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p><code>UPDATE</code> records in the Delta Table that matches an optional predicate. Either updates or new_values needs to be passed for it to execute.</p> <p>Parameters:</p> Name Type Description Default <code>updates</code> <code>Optional[Dict[str, str]]</code> <p>a mapping of column name to update SQL expression.</p> <code>None</code> <code>new_values</code> <code>Optional[Dict[str, Union[int, float, str, datetime, bool, List[Any]]]]</code> <p>a mapping of column name to python datatype.</p> <code>None</code> <code>predicate</code> <code>Optional[str]</code> <p>a logical expression.</p> <code>None</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>error_on_type_mismatch</code> <code>bool</code> <p>specify if update will return error if data types are mismatching :default = True</p> <code>True</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:     the metrics from update</p> Example <p>Update some row values with SQL predicate</p> <p>This is equivalent to <code>UPDATE table SET deleted = true WHERE id = '3'</code> <pre><code>from deltalake import write_deltalake, DeltaTable\nimport pandas as pd\ndf = pd.DataFrame(\n    {\"id\": [\"1\", \"2\", \"3\"],\n    \"deleted\": [False, False, False],\n    \"price\": [10., 15., 20.]\n    })\nwrite_deltalake(\"tmp\", df)\ndt = DeltaTable(\"tmp\")\ndt.update(predicate=\"id = '3'\", updates = {\"deleted\": 'True'})\n\n{'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 1, 'num_copied_rows': 2, 'execution_time_ms': ..., 'scan_time_ms': ...}\n</code></pre></p> <p>Update all row values</p> <p>This is equivalent to <code>UPDATE table SET deleted = true, id = concat(id, '_old')</code>. <pre><code>dt.update(updates = {\"deleted\": 'True', \"id\": \"concat(id, '_old')\"})\n\n{'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 3, 'num_copied_rows': 0, 'execution_time_ms': ..., 'scan_time_ms': ...}\n</code></pre></p> <p>Use Python objects instead of SQL strings</p> <p>Use the <code>new_values</code> parameter instead of the <code>updates</code> parameter. For example, this is equivalent to <code>UPDATE table SET price = 150.10 WHERE id = '1'</code> <pre><code>dt.update(predicate=\"id = '1_old'\", new_values = {\"price\": 150.10})\n\n{'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 1, 'num_copied_rows': 2, 'execution_time_ms': ..., 'scan_time_ms': ...}\n</code></pre></p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.update_incremental","title":"update_incremental","text":"<pre><code>update_incremental() -&gt; None\n</code></pre> <p>Updates the DeltaTable to the latest version by incrementally applying newer versions.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.vacuum","title":"vacuum","text":"<pre><code>vacuum(retention_hours: Optional[int] = None, dry_run: bool = True, enforce_retention_duration: bool = True, custom_metadata: Optional[Dict[str, str]] = None) -&gt; List[str]\n</code></pre> <p>Run the Vacuum command on the Delta Table: list and delete files no longer referenced by the Delta table and are older than the retention threshold.</p> <p>Parameters:</p> Name Type Description Default <code>retention_hours</code> <code>Optional[int]</code> <p>the retention threshold in hours, if none then the value from <code>configuration.deletedFileRetentionDuration</code> is used or default of 1 week otherwise.</p> <code>None</code> <code>dry_run</code> <code>bool</code> <p>when activated, list only the files, delete otherwise</p> <code>True</code> <code>enforce_retention_duration</code> <code>bool</code> <p>when disabled, accepts retention hours smaller than the value from <code>configuration.deletedFileRetentionDuration</code>.</p> <code>True</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:     the list of files no longer referenced by the Delta Table and are older than the retention threshold.</p>","boost":2},{"location":"api/delta_table/#deltalake.DeltaTable.version","title":"version","text":"<pre><code>version() -&gt; int\n</code></pre> <p>Get the version of the DeltaTable.</p> <p>Returns:</p> Type Description <code>int</code> <p>The current version of the DeltaTable</p>","boost":2},{"location":"api/delta_table/delta_table_alterer/","title":"TableAlterer","text":"","boost":10},{"location":"api/delta_table/delta_table_alterer/#deltalake.table.TableAlterer","title":"deltalake.table.TableAlterer","text":"<pre><code>TableAlterer(table: DeltaTable)\n</code></pre> <p>API for various table alteration commands.</p>","boost":10},{"location":"api/delta_table/delta_table_alterer/#deltalake.table.TableAlterer.add_constraint","title":"add_constraint","text":"<pre><code>add_constraint(constraints: Dict[str, str], custom_metadata: Optional[Dict[str, str]] = None) -&gt; None\n</code></pre> <p>Add constraints to the table. Limited to <code>single constraint</code> at once.</p> <p>Parameters:</p> Name Type Description Default <code>constraints</code> <code>Dict[str, str]</code> <p>mapping of constraint name to SQL-expression to evaluate on write</p> required <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Example:     <pre><code>from deltalake import DeltaTable\ndt = DeltaTable(\"test_table_constraints\")\ndt.alter.add_constraint({\n    \"value_gt_5\": \"value &gt; 5\",\n})\n</code></pre></p> <pre><code>**Check configuration**\n```\ndt.metadata().configuration\n{'delta.constraints.value_gt_5': 'value &gt; 5'}\n```\n</code></pre>","boost":10},{"location":"api/delta_table/delta_table_merger/","title":"TableMerger","text":"","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger","title":"deltalake.table.TableMerger","text":"<pre><code>TableMerger(table: DeltaTable, source: pyarrow.RecordBatchReader, predicate: str, source_alias: Optional[str] = None, target_alias: Optional[str] = None, safe_cast: bool = True, writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None)\n</code></pre> <p>API for various table <code>MERGE</code> commands.</p>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.execute","title":"execute","text":"<pre><code>execute() -&gt; Dict[str, Any]\n</code></pre> <p>Executes <code>MERGE</code> with the previously provided settings in Rust with Apache Datafusion query engine.</p> <p>Returns:</p> Name Type Description <code>Dict</code> <code>Dict[str, Any]</code> <p>metrics</p>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_matched_delete","title":"when_matched_delete","text":"<pre><code>when_matched_delete(predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Delete a matched row from the table only if the given <code>predicate</code> (if specified) is true for the matched row. If not specified it deletes all matches.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>(str | None, Optional)</code> <p>SQL like predicate on when to delete.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <p>Delete on a predicate</p> <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [2, 3], \"deleted\": [False, True]})\n\n(\n    dt.merge(\n        source=new_data,\n        predicate='target.x = source.x',\n        source_alias='source',\n        target_alias='target')\n    .when_matched_delete(\n        predicate=\"source.deleted = true\")\n    .execute()\n)\n{'num_source_rows': 2, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 1, 'num_target_rows_copied': 2, 'num_output_rows': 2, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas().sort_values(\"x\", ignore_index=True)\n   x  y\n0  1  4\n1  2  5\n</code></pre> <p>Delete all records that were matched <pre><code>dt = DeltaTable(\"tmp\")\n(\n    dt.merge(\n        source=new_data,\n        predicate='target.x = source.x',\n        source_alias='source',\n        target_alias='target')\n    .when_matched_delete()\n    .execute()\n)\n{'num_source_rows': 2, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 1, 'num_target_rows_copied': 1, 'num_output_rows': 1, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas()\n   x  y\n0  1  4\n</code></pre></p>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_matched_update","title":"when_matched_update","text":"<pre><code>when_matched_update(updates: Dict[str, str], predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Update a matched table row based on the rules defined by <code>updates</code>. If a <code>predicate</code> is specified, then it must evaluate to true for the row to be updated.</p> <p>Parameters:</p> Name Type Description Default <code>updates</code> <code>Dict[str, str]</code> <p>a mapping of column name to update SQL expression.</p> required <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to update.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [1], \"y\": [7]})\n\n(\n     dt.merge(\n         source=new_data,\n         predicate=\"target.x = source.x\",\n         source_alias=\"source\",\n         target_alias=\"target\")\n     .when_matched_update(updates={\"x\": \"source.x\", \"y\": \"source.y\"})\n     .execute()\n)\n{'num_source_rows': 1, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas()\n   x  y\n0  1  7\n1  2  5\n2  3  6\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_matched_update_all","title":"when_matched_update_all","text":"<pre><code>when_matched_update_all(predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Updating all source fields to target fields, source and target are required to have the same field names. If a <code>predicate</code> is specified, then it must evaluate to true for the row to be updated.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to update all columns.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [1], \"y\": [7]})\n\n(\n    dt.merge(\n        source=new_data,\n        predicate=\"target.x = source.x\",\n        source_alias=\"source\",\n        target_alias=\"target\")\n    .when_matched_update_all()\n    .execute()\n)\n{'num_source_rows': 1, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas()\n   x  y\n0  1  7\n1  2  5\n2  3  6\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_not_matched_by_source_delete","title":"when_not_matched_by_source_delete","text":"<pre><code>when_not_matched_by_source_delete(predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Delete a target row that has no matches in the source from the table only if the given <code>predicate</code> (if specified) is true for the target row.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to delete when not matched by source.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_not_matched_by_source_update","title":"when_not_matched_by_source_update","text":"<pre><code>when_not_matched_by_source_update(updates: Dict[str, str], predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Update a target row that has no matches in the source based on the rules defined by <code>updates</code>. If a <code>predicate</code> is specified, then it must evaluate to true for the row to be updated.</p> <p>Parameters:</p> Name Type Description Default <code>updates</code> <code>Dict[str, str]</code> <p>a mapping of column name to update SQL expression.</p> required <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to update.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [2, 3, 4]})\n\n(\n   dt.merge(\n       source=new_data,\n       predicate='target.x = source.x',\n       source_alias='source',\n       target_alias='target')\n   .when_not_matched_by_source_update(\n       predicate = \"y &gt; 3\",\n       updates = {\"y\": \"0\"})\n   .execute()\n)\n{'num_source_rows': 3, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas().sort_values(\"x\", ignore_index=True)\n   x  y\n0  1  0\n1  2  5\n2  3  6\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_not_matched_insert","title":"when_not_matched_insert","text":"<pre><code>when_not_matched_insert(updates: Dict[str, str], predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Insert a new row to the target table based on the rules defined by <code>updates</code>. If a <code>predicate</code> is specified, then it must evaluate to true for the new row to be inserted.</p> <p>Parameters:</p> Name Type Description Default <code>updates</code> <code>dict</code> <p>a mapping of column name to insert SQL expression.</p> required <code>predicate</code> <code>(str | None, Optional)</code> <p>SQL like predicate on when to insert.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [4], \"y\": [7]})\n\n(\n    dt.merge(\n        source=new_data,\n        predicate=\"target.x = source.x\",\n        source_alias=\"source\",\n        target_alias=\"target\",)\n    .when_not_matched_insert(\n        updates={\n            \"x\": \"source.x\",\n            \"y\": \"source.y\",\n        })\n    .execute()\n)\n{'num_source_rows': 1, 'num_target_rows_inserted': 1, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 3, 'num_output_rows': 4, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas().sort_values(\"x\", ignore_index=True)\n   x  y\n0  1  4\n1  2  5\n2  3  6\n3  4  7\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.when_not_matched_insert_all","title":"when_not_matched_insert_all","text":"<pre><code>when_not_matched_insert_all(predicate: Optional[str] = None) -&gt; TableMerger\n</code></pre> <p>Insert a new row to the target table, updating all source fields to target fields. Source and target are required to have the same field names. If a <code>predicate</code> is specified, then it must evaluate to true for the new row to be inserted.</p> <p>Parameters:</p> Name Type Description Default <code>predicate</code> <code>Optional[str]</code> <p>SQL like predicate on when to insert.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p> Example <pre><code>from deltalake import DeltaTable, write_deltalake\nimport pyarrow as pa\n\ndata = pa.table({\"x\": [1, 2, 3], \"y\": [4, 5, 6]})\nwrite_deltalake(\"tmp\", data)\ndt = DeltaTable(\"tmp\")\nnew_data = pa.table({\"x\": [4], \"y\": [7]})\n\n(\n   dt.merge(\n       source=new_data,\n       predicate='target.x = source.x',\n       source_alias='source',\n       target_alias='target')\n   .when_not_matched_insert_all()\n   .execute()\n)\n{'num_source_rows': 1, 'num_target_rows_inserted': 1, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 3, 'num_output_rows': 4, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}\n\ndt.to_pandas().sort_values(\"x\", ignore_index=True)\n   x  y\n0  1  4\n1  2  5\n2  3  6\n3  4  7\n</code></pre>","boost":2},{"location":"api/delta_table/delta_table_merger/#deltalake.table.TableMerger.with_writer_properties","title":"with_writer_properties","text":"<pre><code>with_writer_properties(data_page_size_limit: Optional[int] = None, dictionary_page_size_limit: Optional[int] = None, data_page_row_count_limit: Optional[int] = None, write_batch_size: Optional[int] = None, max_row_group_size: Optional[int] = None) -&gt; TableMerger\n</code></pre> <p>Deprecated</p> <p>Use <code>.merge(writer_properties = WriterProperties())</code> instead</p> <p>Pass writer properties to the Rust parquet writer, see options https://arrow.apache.org/rust/parquet/file/properties/struct.WriterProperties.html:</p> <p>Parameters:</p> Name Type Description Default <code>data_page_size_limit</code> <code>Optional[int]</code> <p>Limit DataPage size to this in bytes.</p> <code>None</code> <code>dictionary_page_size_limit</code> <code>Optional[int]</code> <p>Limit the size of each DataPage to store dicts to this amount in bytes.</p> <code>None</code> <code>data_page_row_count_limit</code> <code>Optional[int]</code> <p>Limit the number of rows in each DataPage.</p> <code>None</code> <code>write_batch_size</code> <code>Optional[int]</code> <p>Splits internally to smaller batch size.</p> <code>None</code> <code>max_row_group_size</code> <code>Optional[int]</code> <p>Max number of rows in row group.</p> <code>None</code> <p>Returns:</p> Name Type Description <code>TableMerger</code> <code>TableMerger</code> <p>TableMerger Object</p>","boost":2},{"location":"api/delta_table/delta_table_optimizer/","title":"TableOptimizer","text":"","boost":10},{"location":"api/delta_table/delta_table_optimizer/#deltalake.table.TableOptimizer","title":"deltalake.table.TableOptimizer","text":"<pre><code>TableOptimizer(table: DeltaTable)\n</code></pre> <p>API for various table optimization commands.</p>","boost":10},{"location":"api/delta_table/delta_table_optimizer/#deltalake.table.TableOptimizer.compact","title":"compact","text":"<pre><code>compact(partition_filters: Optional[FilterType] = None, target_size: Optional[int] = None, max_concurrent_tasks: Optional[int] = None, min_commit_interval: Optional[Union[int, timedelta]] = None, writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Compacts small files to reduce the total number of files in the table.</p> <p>This operation is idempotent; if run twice on the same table (assuming it has not been updated) it will do nothing the second time.</p> <p>If this operation happens concurrently with any operations other than append, it will fail.</p> <p>Parameters:</p> Name Type Description Default <code>partition_filters</code> <code>Optional[FilterType]</code> <p>the partition filters that will be used for getting the matched files</p> <code>None</code> <code>target_size</code> <code>Optional[int]</code> <p>desired file size after bin-packing files, in bytes. If not             provided, will attempt to read the table configuration value <code>delta.targetFileSize</code>.             If that value isn't set, will use default value of 256MB.</p> <code>None</code> <code>max_concurrent_tasks</code> <code>Optional[int]</code> <p>the maximum number of concurrent tasks to use for                     file compaction. Defaults to number of CPUs. More concurrent tasks can make compaction                     faster, but will also use more memory.</p> <code>None</code> <code>min_commit_interval</code> <code>Optional[Union[int, timedelta]]</code> <p>minimum interval in seconds or as timedeltas before a new commit is                     created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you                     want a commit per partition.</p> <code>None</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Type Description <code>Dict[str, Any]</code> <p>the metrics from optimize</p> Example <p>Use a timedelta object to specify the seconds, minutes or hours of the interval. <pre><code>from deltalake import DeltaTable, write_deltalake\nfrom datetime import timedelta\nimport pyarrow as pa\n\nwrite_deltalake(\"tmp\", pa.table({\"x\": [1], \"y\": [4]}))\nwrite_deltalake(\"tmp\", pa.table({\"x\": [2], \"y\": [5]}), mode=\"append\")\n\ndt = DeltaTable(\"tmp\")\ntime_delta = timedelta(minutes=10)\ndt.optimize.compact(min_commit_interval=time_delta)\n{'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 1, 'numBatches': 2, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True}\n</code></pre></p>","boost":10},{"location":"api/delta_table/delta_table_optimizer/#deltalake.table.TableOptimizer.z_order","title":"z_order","text":"<pre><code>z_order(columns: Iterable[str], partition_filters: Optional[FilterType] = None, target_size: Optional[int] = None, max_concurrent_tasks: Optional[int] = None, max_spill_size: int = 20 * 1024 * 1024 * 1024, min_commit_interval: Optional[Union[int, timedelta]] = None, writer_properties: Optional[WriterProperties] = None, custom_metadata: Optional[Dict[str, str]] = None) -&gt; Dict[str, Any]\n</code></pre> <p>Reorders the data using a Z-order curve to improve data skipping.</p> <p>This also performs compaction, so the same parameters as compact() apply.</p> <p>Parameters:</p> Name Type Description Default <code>columns</code> <code>Iterable[str]</code> <p>the columns to use for Z-ordering. There must be at least one column.         partition_filters: the partition filters that will be used for getting the matched files</p> required <code>target_size</code> <code>Optional[int]</code> <p>desired file size after bin-packing files, in bytes. If not             provided, will attempt to read the table configuration value <code>delta.targetFileSize</code>.             If that value isn't set, will use default value of 256MB.</p> <code>None</code> <code>max_concurrent_tasks</code> <code>Optional[int]</code> <p>the maximum number of concurrent tasks to use for                     file compaction. Defaults to number of CPUs. More concurrent tasks can make compaction                     faster, but will also use more memory.</p> <code>None</code> <code>max_spill_size</code> <code>int</code> <p>the maximum number of bytes to spill to disk. Defaults to 20GB.</p> <code>20 * 1024 * 1024 * 1024</code> <code>min_commit_interval</code> <code>Optional[Union[int, timedelta]]</code> <p>minimum interval in seconds or as timedeltas before a new commit is                     created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you                     want a commit per partition.</p> <code>None</code> <code>writer_properties</code> <code>Optional[WriterProperties]</code> <p>Pass writer properties to the Rust parquet writer.</p> <code>None</code> <code>custom_metadata</code> <code>Optional[Dict[str, str]]</code> <p>custom metadata that will be added to the transaction commit.</p> <code>None</code> <p>Returns:</p> Type Description <code>Dict[str, Any]</code> <p>the metrics from optimize</p> Example <p>Use a timedelta object to specify the seconds, minutes or hours of the interval. <pre><code>from deltalake import DeltaTable, write_deltalake\nfrom datetime import timedelta\nimport pyarrow as pa\n\nwrite_deltalake(\"tmp\", pa.table({\"x\": [1], \"y\": [4]}))\nwrite_deltalake(\"tmp\", pa.table({\"x\": [2], \"y\": [5]}), mode=\"append\")\n\ndt = DeltaTable(\"tmp\")\ntime_delta = timedelta(minutes=10)\ndt.optimize.z_order([\"x\"], min_commit_interval=time_delta)\n{'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 0, 'numBatches': 1, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True}\n</code></pre></p>","boost":10},{"location":"api/delta_table/metadata/","title":"Metadata","text":"","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata","title":"deltalake.Metadata  <code>dataclass</code>","text":"<pre><code>Metadata(table: RawDeltaTable)\n</code></pre> <p>Create a Metadata instance.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.configuration","title":"configuration  <code>property</code>","text":"<pre><code>configuration: Dict[str, str]\n</code></pre> <p>Return the DeltaTable properties.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.created_time","title":"created_time  <code>property</code>","text":"<pre><code>created_time: int\n</code></pre> <p>Return The time when this metadata action is created, in milliseconds since the Unix epoch of the DeltaTable.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.description","title":"description  <code>property</code>","text":"<pre><code>description: str\n</code></pre> <p>Return the user-provided description of the DeltaTable.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.id","title":"id  <code>property</code>","text":"<pre><code>id: int\n</code></pre> <p>Return the unique identifier of the DeltaTable.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.name","title":"name  <code>property</code>","text":"<pre><code>name: str\n</code></pre> <p>Return the user-provided identifier of the DeltaTable.</p>","boost":2},{"location":"api/delta_table/metadata/#deltalake.Metadata.partition_columns","title":"partition_columns  <code>property</code>","text":"<pre><code>partition_columns: List[str]\n</code></pre> <p>Return an array containing the names of the partitioned columns of the DeltaTable.</p>","boost":2},{"location":"how-delta-lake-works/architecture-of-delta-table/","title":"Architecture of a Delta Lake table","text":"<p>A Delta table consists of Parquet files that contain data and a transaction log that stores metadata about the transactions.</p> <p></p> <p>Let's create a Delta table, perform some operations, and inspect the files that are created.</p>"},{"location":"how-delta-lake-works/architecture-of-delta-table/#delta-lake-transaction-examples","title":"Delta Lake transaction examples","text":"<p>Start by creating a pandas DataFrame and writing it out to a Delta table.</p> <pre><code>import pandas as pd\nfrom deltalake import DeltaTable, write_deltalake\n\ndf = pd.DataFrame({\"num\": [1, 2, 3], \"letter\": [\"a\", \"b\", \"c\"]})\nwrite_deltalake(\"tmp/some-table\", df)\n</code></pre> <p>Now inspect the files created in storage:</p> <pre><code>tmp/some-table\n\u251c\u2500\u2500 0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\n\u2514\u2500\u2500 _delta_log\n    \u2514\u2500\u2500 00000000000000000000.json\n</code></pre> <p>The Parquet file stores the data that was written.  The <code>_delta_log</code> directory stores metadata about the transactions.  Let's inspect the <code>_delta_log/00000000000000000000.json</code> file.</p> <pre><code>{\n  \"protocol\": {\n    \"minReaderVersion\": 1,\n    \"minWriterVersion\": 1\n  }\n}\n{\n  \"metaData\": {\n    \"id\": \"b96ea1a2-1830-4da2-8827-5334cc6104ed\",\n    \"name\": null,\n    \"description\": null,\n    \"format\": {\n      \"provider\": \"parquet\",\n      \"options\": {}\n    },\n    \"schemaString\": \"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"num\\\",\\\"type\\\":\\\"long\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"letter\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\",\n    \"partitionColumns\": [],\n    \"createdTime\": 1701740315599,\n    \"configuration\": {}\n  }\n}\n{\n  \"add\": {\n    \"path\": \"0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\",\n    \"size\": 2208,\n    \"partitionValues\": {},\n    \"modificationTime\": 1701740315597,\n    \"dataChange\": true,\n    \"stats\": \"{\\\"numRecords\\\": 3, \\\"minValues\\\": {\\\"num\\\": 1, \\\"letter\\\": \\\"a\\\"}, \\\"maxValues\\\": {\\\"num\\\": 3, \\\"letter\\\": \\\"c\\\"}, \\\"nullCount\\\": {\\\"num\\\": 0, \\\"letter\\\": 0}}\"\n  }\n}\n{\n  \"commitInfo\": {\n    \"timestamp\": 1701740315602,\n    \"operation\": \"CREATE TABLE\",\n    \"operationParameters\": {\n      \"location\": \"file:///Users/matthew.powers/Documents/code/delta/delta-examples/notebooks/python-deltalake/tmp/some-table\",\n      \"metadata\": \"{\\\"configuration\\\":{},\\\"created_time\\\":1701740315599,\\\"description\\\":null,\\\"format\\\":{\\\"options\\\":{},\\\"provider\\\":\\\"parquet\\\"},\\\"id\\\":\\\"b96ea1a2-1830-4da2-8827-5334cc6104ed\\\",\\\"name\\\":null,\\\"partition_columns\\\":[],\\\"schema\\\":{\\\"fields\\\":[{\\\"metadata\\\":{},\\\"name\\\":\\\"num\\\",\\\"nullable\\\":true,\\\"type\\\":\\\"long\\\"},{\\\"metadata\\\":{},\\\"name\\\":\\\"letter\\\",\\\"nullable\\\":true,\\\"type\\\":\\\"string\\\"}],\\\"type\\\":\\\"struct\\\"}}\",\n      \"protocol\": \"{\\\"minReaderVersion\\\":1,\\\"minWriterVersion\\\":1}\",\n      \"mode\": \"ErrorIfExists\"\n    },\n    \"clientVersion\": \"delta-rs.0.17.0\"\n  }\n}\n</code></pre> <p>The tranasction log file contains the following information:</p> <ul> <li>the files added to the Delta table</li> <li>schema of the files</li> <li>column level metadata including the min/max value for each file</li> </ul> <p>Create another pandas DataFrame and append it to the Delta table to see how this transaction is recorded.</p> <pre><code>df = pd.DataFrame({\"num\": [8, 9], \"letter\": [\"dd\", \"ee\"]})\nwrite_deltalake(f\"{cwd}/tmp/delta-table\", df, mode=\"append\")\n</code></pre> <p>Here are the files in storage:</p> <pre><code>tmp/some-table\n\u251c\u2500\u2500 0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\n\u251c\u2500\u2500 1-57abb6fb-2249-43ba-a7be-cf09bcc230de-0.parquet\n\u2514\u2500\u2500 _delta_log\n    \u251c\u2500\u2500 00000000000000000000.json\n    \u2514\u2500\u2500 00000000000000000001.json\n</code></pre> <p>Here are the contents of the <code>_delta_log/00000000000000000001.json</code> file:</p> <pre><code>{\n  \"add\": {\n    \"path\": \"1-57abb6fb-2249-43ba-a7be-cf09bcc230de-0.parquet\",\n    \"size\": 2204,\n    \"partitionValues\": {},\n    \"modificationTime\": 1701740386169,\n    \"dataChange\": true,\n    \"stats\": \"{\\\"numRecords\\\": 2, \\\"minValues\\\": {\\\"num\\\": 8, \\\"letter\\\": \\\"dd\\\"}, \\\"maxValues\\\": {\\\"num\\\": 9, \\\"letter\\\": \\\"ee\\\"}, \\\"nullCount\\\": {\\\"num\\\": 0, \\\"letter\\\": 0}}\"\n  }\n}\n{\n  \"commitInfo\": {\n    \"timestamp\": 1701740386169,\n    \"operation\": \"WRITE\",\n    \"operationParameters\": {\n      \"partitionBy\": \"[]\",\n      \"mode\": \"Append\"\n    },\n    \"clientVersion\": \"delta-rs.0.17.0\"\n  }\n}\n</code></pre> <p>The transaction log records that the second file has been persisted in the Delta table.</p> <p>Now create a third pandas DataFrame and overwrite the Delta table with the new data.</p> <pre><code>df = pd.DataFrame({\"num\": [11, 22], \"letter\": [\"aa\", \"bb\"]})\nwrite_deltalake(f\"{cwd}/tmp/delta-table\", df, mode=\"append\")\n</code></pre> <p>Here are the files in storage:</p> <pre><code>tmp/some-table\n\u251c\u2500\u2500 0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\n\u251c\u2500\u2500 1-57abb6fb-2249-43ba-a7be-cf09bcc230de-0.parquet\n\u251c\u2500\u2500 2-95ef2108-480c-4b89-96f0-ff9185dab9ad-0.parquet\n\u2514\u2500\u2500 _delta_log\n    \u251c\u2500\u2500 00000000000000000000.json\n    \u251c\u2500\u2500 00000000000000000001.json\n    \u2514\u2500\u2500 00000000000000000002.json\n</code></pre> <p>Here are the contents of the <code>_delta_log/0002.json</code> file:</p> <pre><code>{\n  \"add\": {\n    \"path\": \"2-95ef2108-480c-4b89-96f0-ff9185dab9ad-0.parquet\",\n    \"size\": 2204,\n    \"partitionValues\": {},\n    \"modificationTime\": 1701740465102,\n    \"dataChange\": true,\n    \"stats\": \"{\\\"numRecords\\\": 2, \\\"minValues\\\": {\\\"num\\\": 11, \\\"letter\\\": \\\"aa\\\"}, \\\"maxValues\\\": {\\\"num\\\": 22, \\\"letter\\\": \\\"bb\\\"}, \\\"nullCount\\\": {\\\"num\\\": 0, \\\"letter\\\": 0}}\"\n  }\n}\n{\n  \"remove\": {\n    \"path\": \"0-62dffa23-bbe1-4496-8fb5-bff6724dc677-0.parquet\",\n    \"deletionTimestamp\": 1701740465102,\n    \"dataChange\": true,\n    \"extendedFileMetadata\": false,\n    \"partitionValues\": {},\n    \"size\": 2208\n  }\n}\n{\n  \"remove\": {\n    \"path\": \"1-57abb6fb-2249-43ba-a7be-cf09bcc230de-0.parquet\",\n    \"deletionTimestamp\": 1701740465102,\n    \"dataChange\": true,\n    \"extendedFileMetadata\": false,\n    \"partitionValues\": {},\n    \"size\": 2204\n  }\n}\n{\n  \"commitInfo\": {\n    \"timestamp\": 1701740465102,\n    \"operation\": \"WRITE\",\n    \"operationParameters\": {\n      \"mode\": \"Overwrite\",\n      \"partitionBy\": \"[]\"\n    },\n    \"clientVersion\": \"delta-rs.0.17.0\"\n  }\n}\n</code></pre> <p>This transaction adds a data file and marks the two exising data files for removal.  Marking a file for removal in the transaction log is known as \"tombstoning the file\" or a \"logical delete\".  This is different from a \"physical delete\" which actually removes the data file from storage.</p>"},{"location":"how-delta-lake-works/architecture-of-delta-table/#how-delta-table-operations-differ-from-data-lakes","title":"How Delta table operations differ from data lakes","text":"<p>Data lakes consist of data files persisted in storage.  They don't have a transaction log that retain metadata about the transactions.</p> <p>Data lakes perform transactions differently than Delta tables.</p> <p>When you perform an overwrite tranasction with a Delta table, you logically delete the exiting data without physically removing it.</p> <p>Data lakes don't support logical deletes, so you have to physically delete the data from storage.</p> <p>Logical data operations are safer because they can be rolled back if they don't complete successfully.  Physically removing data from storage can be dangerous, especially if it's before a transaction is complete.</p> <p>We're now ready to look into Delta Lake ACID transactions in more detail.</p>"},{"location":"integrations/delta-lake-arrow/","title":"Delta Lake Arrow Integrations","text":"<p>Delta Lake tables can be exposed as Arrow tables and Arrow datasets, which allows for interoperability with a variety of query engines.</p> <p>This page shows you how to convert Delta tables to Arrow data structures and teaches you the difference between Arrow tables and Arrow datasets.  Tables are \"eager\" and datasets are \"lazy\", which has important performance implications, keep reading to learn more!</p>"},{"location":"integrations/delta-lake-arrow/#delta-lake-to-arrow-dataset","title":"Delta Lake to Arrow Dataset","text":"<p>Delta tables can easily be exposed as Arrow datasets.  This makes it easy for any query engine that can read Arrow datasets to read a Delta table.</p> <p>Let's take a look at the h2o groupby dataset that contains 9 columns of data.  Here are three representative rows of data:</p> <pre><code>+-------+-------+--------------+-------+-------+--------+------+------+---------+\n| id1   | id2   | id3          |   id4 |   id5 |    id6 |   v1 |   v2 |      v3 |\n|-------+-------+--------------+-------+-------+--------+------+------+---------|\n| id016 | id046 | id0000109363 |    88 |    13 | 146094 |    4 |    6 | 18.8377 |\n| id039 | id087 | id0000466766 |    14 |    30 | 111330 |    4 |   14 | 46.7973 |\n| id047 | id098 | id0000307804 |    85 |    23 | 187639 |    3 |    5 | 47.5773 |\n+-------+-------+--------------+-------+-------+--------+------+------+---------+\n</code></pre> <p>Here's how to expose the Delta table as a PyArrow dataset and run a query with DuckDB:</p> <pre><code>import duckdb\nfrom deltalake import DeltaTable\n\ntable = DeltaTable(\"delta/G1_1e9_1e2_0_0\")\ndataset = table.to_pyarrow_dataset()\nquack = duckdb.arrow(dataset)\nquack.filter(\"id1 = 'id016' and v2 &gt; 10\")\n</code></pre> <p>Here's the result:</p> <pre><code>\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502   id1   \u2502   id2   \u2502     id3      \u2502  id4  \u2502  id5  \u2502   id6   \u2502  v1   \u2502  v2   \u2502    v3     \u2502\n\u2502 varchar \u2502 varchar \u2502   varchar    \u2502 int32 \u2502 int32 \u2502  int32  \u2502 int32 \u2502 int32 \u2502  double   \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 id016   \u2502 id054   \u2502 id0002309114 \u2502    62 \u2502    95 \u2502 7180859 \u2502     4 \u2502    13 \u2502  7.750173 \u2502\n\u2502 id016   \u2502 id044   \u2502 id0003968533 \u2502    63 \u2502    98 \u2502 2356363 \u2502     4 \u2502    14 \u2502  3.942417 \u2502\n\u2502 id016   \u2502 id034   \u2502 id0001082839 \u2502    58 \u2502    73 \u2502 8039808 \u2502     5 \u2502    12 \u2502 76.820135 \u2502\n\u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524\n\u2502 ? rows (&gt;9999 rows, 3 shown)                                                 9 columns \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n</code></pre> <p>Arrow datasets allow for the predicates to get pushed down to the query engine, so the query is executed quickly.</p>"},{"location":"integrations/delta-lake-arrow/#delta-lake-to-arrow-table","title":"Delta Lake to Arrow Table","text":"<p>You can also run the same query with DuckDB on an Arrow table:</p> <pre><code>quack = duckdb.arrow(table.to_pyarrow_table())\nquack.filter(\"id1 = 'id016' and v2 &gt; 10\")\n</code></pre> <p>This returns the same result, but it runs slower.</p>"},{"location":"integrations/delta-lake-arrow/#difference-between-arrow-dataset-and-arrow-table","title":"Difference between Arrow Dataset and Arrow Table","text":"<p>Arrow Datasets are lazy and allow for full predicate pushdown unlike Arrow tables which are eagerly loaded into memory.</p> <p>The previous DuckDB queries were run on a 1 billion row dataset that's roughly 50 GB when stored as an uncompressed CSV file.  Here are the runtimes when the data is stored in a Delta table and the queries are executed on a 2021 Macbook M1 with 64 GB of RAM:</p> <ul> <li>Arrow table: 17.1 seconds</li> <li>Arrow dataset: 0.01 seconds</li> </ul> <p>The query runs much faster on an Arrow dataset because the predicates can be pushed down to the query engine and lots of data can be skipped.</p> <p>Arrow tables are eagerly materialized in memory and don't allow for the same amount of data skipping.</p>"},{"location":"integrations/delta-lake-arrow/#multiple-query-engines-can-query-arrow-datasets","title":"Multiple query engines can query Arrow Datasets","text":"<p>Other query engines like DataFusion can also query Arrow datasets, see the following example:</p> <pre><code>from datafusion import SessionContext\n\nctx = SessionContext()\nctx.register_dataset(\"my_dataset\", table.to_pyarrow_dataset())\nctx.sql(\"select * from my_dataset where v2 &gt; 5\")\n</code></pre> <p>Here's the result:</p> <pre><code>+-------+-------+--------------+-----+-----+--------+----+----+-----------+\n| id1   | id2   | id3          | id4 | id5 | id6    | v1 | v2 | v3        |\n+-------+-------+--------------+-----+-----+--------+----+----+-----------+\n| id082 | id049 | id0000022715 | 97  | 55  | 756924 | 2  | 11 | 74.161136 |\n| id053 | id052 | id0000113549 | 19  | 56  | 139048 | 1  | 10 | 95.178444 |\n| id090 | id043 | id0000637409 | 94  | 50  | 12448  | 3  | 12 | 60.21896  |\n+-------+-------+--------------+-----+-----+--------+----+----+-----------+\n</code></pre> <p>Any query engine that's capable of reading an Arrow table/dataset can read a Delta table.</p>"},{"location":"integrations/delta-lake-arrow/#conclusion","title":"Conclusion","text":"<p>Delta tables can easily be exposed as Arrow tables/datasets.</p> <p>Therefore any query engine that can read an Arrow table/dataset can also read a Delta table.</p> <p>Arrow datasets allow for more predicates to be pushed down to the query engine, so they can perform better performance than Arrow tables.</p>"},{"location":"integrations/delta-lake-datafusion/","title":"Using Delta Lake with DataFusion","text":"<p>This page explains how to use Delta Lake with DataFusion.</p> <p>Delta Lake offers DataFusion users better performance and more features compared to other formats like CSV or Parquet.</p> <p>Delta Lake works well with the DataFusion Rust API and the DataFusion Python API.  It's a great option for all DataFusion users.</p> <p>Delta Lake also depends on DataFusion to implement SQL-related functionality under the hood.  We will also discuss this dependency at the end of this guide in case you're interested in learning more about the symbiotic relationship between the two libraries.</p>"},{"location":"integrations/delta-lake-datafusion/#delta-lake-performance-benefits-for-datafusion-users","title":"Delta Lake performance benefits for DataFusion users","text":"<p>Let's run some DataFusion queries on a Parquet file and a Delta table with the same data to learn more about the performance benefits of Delta Lake.</p> <p>Suppose you have the following dataset with 1 billion rows and 9 columns.  Here are the first three rows of data:</p> <pre><code>+-------+-------+--------------+-------+-------+--------+------+------+---------+\n| id1   | id2   | id3          |   id4 |   id5 |    id6 |   v1 |   v2 |      v3 |\n|-------+-------+--------------+-------+-------+--------+------+------+---------|\n| id016 | id046 | id0000109363 |    88 |    13 | 146094 |    4 |    6 | 18.8377 |\n| id039 | id087 | id0000466766 |    14 |    30 | 111330 |    4 |   14 | 46.7973 |\n| id047 | id098 | id0000307804 |    85 |    23 | 187639 |    3 |    5 | 47.5773 |\n+-------+-------+--------------+-------+-------+--------+------+------+---------+\n</code></pre> <p>Here's how to register a Delta Lake table as a PyArrow dataset:</p> <pre><code>from datafusion import SessionContext\nfrom deltalake import DeltaTable\n\nctx = SessionContext()\ntable = DeltaTable(\"G1_1e9_1e2_0_0\")\nctx.register_dataset(\"my_delta_table\", table.to_pyarrow_dataset())\n</code></pre> <p>Now query the table:</p> <pre><code>ctx.sql(\"select id1, sum(v1) as v1 from my_delta_table where id1='id096' group by id1\")\n</code></pre> <p>That query takes 2.8 seconds to execute.</p> <p>Let's register the same dataset as a Parquet table, run the same query, and compare the runtime difference.</p> <p>Register the Parquet table and run the query:</p> <pre><code>path = \"G1_1e9_1e2_0_0.parquet\"\nctx.register_parquet(\"my_parquet_table\", path)\nctx.sql(\"select id1, sum(v1) as v1 from my_parquet_table where id1='id096' group by id1\")\n</code></pre> <p>This query takes 5.3 seconds to run.</p> <p>Parquet stores data in row groups and DataFusion can intelligently skip row groups that don't contain relevant data, so the query is faster than a file format like CSV which doesn't support row group skipping.</p> <p>Delta Lake stores file-level metadata information in the transaction log, so it can skip entire files when queries are executed.  Delta Lake can skip entire files and then skip row groups within the individual files.  This makes Delta Lake even faster than Parquet files, especially for larger datasets spread across many files.</p>"},{"location":"integrations/delta-lake-datafusion/#delta-lake-features-for-datafusion-users","title":"Delta Lake features for DataFusion users","text":"<p>Delta Lake also provides other features that are useful for DataFusion users like ACID transactions, concurrency protection, time travel, versioned data, and more.</p>"},{"location":"integrations/delta-lake-datafusion/#why-delta-lake-depends-on-datafusion","title":"Why Delta Lake depends on DataFusion","text":"<p>Delta Lake depends on DataFusion to provide some end-user features.</p> <p>DataFusion is useful in providing SQL-related Delta Lake features. Some examples:</p> <ul> <li>Update and merge are written in terms of SQL expressions.</li> <li>Invariants and constraints are written in terms of SQL expressions.</li> </ul> <p>Anytime we have to evaluate SQL, we need some sort of SQL engine.  We use DataFusion for that.</p>"},{"location":"integrations/delta-lake-datafusion/#conclusion","title":"Conclusion","text":"<p>Delta Lake is a great file format for DataFusion users.</p> <p>Delta Lake also uses DataFusion to provide some end-user features.</p> <p>DataFusion and Delta Lake have a wonderful symbiotic relationship and play very nicely with each other.</p> <p>See this guide for more information on Delta Lake and PyArrow and why PyArrow Datasets are often a better option than PyArrow tables.</p>"},{"location":"integrations/delta-lake-pandas/","title":"Using Delta Lake with pandas","text":"<p>Delta Lake is a great storage system for pandas analyses.  This page shows how it's easy to use Delta Lake with pandas, the unique features Delta Lake offers pandas users, and how Delta Lake can make your pandas analyses run faster.</p> <p>Delta Lake is very easy to install for pandas analyses, just run <code>pip install deltalake</code>.</p> <p>Delta Lake allows for performance optimizations, so pandas queries can run much faster than the query run on data stored in CSV or Parquet.  See the following chart for the query runtime for the a Delta tables compared with CSV/Parquet.</p> <p></p> <p>Z Ordered Delta tables run this query much faster than when the data is stored in Parquet or CSV.  Let's dive in deeper and see how Delta Lake makes pandas faster.</p>"},{"location":"integrations/delta-lake-pandas/#delta-lake-makes-pandas-queries-run-faster","title":"Delta Lake makes pandas queries run faster","text":"<p>There are a few reasons Delta Lake can make pandas queries run faster:</p> <ol> <li>column pruning: only grabbing the columns relevant for a query</li> <li>file skipping: only reading files with data for the query</li> <li>row group skipping: only reading row groups with data for the query</li> <li>Z ordering data: colocating similar data in the same files, so file skipping is more effective</li> </ol> <p>Reading less data (fewer columns and/or fewer rows) is how Delta Lake makes pandas queries run faster.</p> <p>Parquet allows for column pruning and row group skipping, but doesn't support file-level skipping or Z Ordering.  CSV doesn't support any of these performance optimizations.</p> <p>Let's take a look at a sample dataset and run a query to see the performance enhancements offered by Delta Lake.</p> <p>Suppose you have a 1 billion row dataset with 9 columns, here are the first three rows of the dataset:</p> <pre><code>+-------+-------+--------------+-------+-------+--------+------+------+---------+\n| id1   | id2   | id3          |   id4 |   id5 |    id6 |   v1 |   v2 |      v3 |\n|-------+-------+--------------+-------+-------+--------+------+------+---------|\n| id016 | id046 | id0000109363 |    88 |    13 | 146094 |    4 |    6 | 18.8377 |\n| id039 | id087 | id0000466766 |    14 |    30 | 111330 |    4 |   14 | 46.7973 |\n| id047 | id098 | id0000307804 |    85 |    23 | 187639 |    3 |    5 | 47.5773 |\n+-------+-------+--------------+-------+-------+--------+------+------+---------+\n</code></pre> <p>The dataset is roughly 50 GB when stored as an uncompressed CSV files.  Let's run some queries on a 2021 Macbook M1 with 64 GB of RAM.</p> <p>Start by running the query on an uncompressed CSV file:</p> <pre><code>(\n    pd.read_csv(f\"{Path.home()}/data/G1_1e9_1e2_0_0.csv\", usecols=[\"id1\", \"id2\", \"v1\"])\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>This query takes 234 seconds to execute.  It runs out of memory if the <code>usecols</code> parameter is not set.</p> <p>Now let's convert the CSV dataset to Parquet and run the same query on the data stored in a Parquet file.</p> <pre><code>(\n    pd.read_parquet(\n        f\"{Path.home()}/data/G1_1e9_1e2_0_0.parquet\", columns=[\"id1\", \"id2\", \"v1\"]\n    )\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>This query takes 118 seconds to execute.</p> <p>Parquet stores data in row groups and allows for skipping when the <code>filters</code> predicates are set.  Run the Parquet query again with row group skipping enabled:</p> <pre><code>(\n    pd.read_parquet(\n        f\"{Path.home()}/data/G1_1e9_1e2_0_0.parquet\",\n        columns=[\"id1\", \"id2\", \"v1\"],\n        filters=[(\"id1\", \"==\", \"id016\")],\n    )\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>This query runs in 19 seconds.  Lots of row groups can be skipped for this particular query.</p> <p>Now let's run the same query on a Delta table to see the out-of-the box performance:</p> <pre><code>(\n    DeltaTable(f\"{Path.home()}/data/deltalake_baseline_G1_1e9_1e2_0_0\", version=0)\n    .to_pandas(filters=[(\"id1\", \"==\", \"id016\")], columns=[\"id1\", \"id2\", \"v1\"])\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>This query runs in 8 seconds, which is a significant performance enhancement.</p> <p>Now let's Z Order the Delta table by <code>id1</code> which will make the data skipping even better.  Run the query again on the Z Ordered Delta table:</p> <pre><code>(\n    DeltaTable(f\"{Path.home()}/data/deltalake_baseline_G1_1e9_1e2_0_0\", version=1)\n    .to_pandas(filters=[(\"id1\", \"==\", \"id016\")], columns=[\"id1\", \"id2\", \"v1\"])\n    .query(\"id1 == 'id016'\")\n    .groupby(\"id2\")\n    .agg({\"v1\": \"sum\"})\n)\n</code></pre> <p>The query now executes in 2.4 seconds.</p> <p>Delta tables can make certain pandas queries run much faster.</p>"},{"location":"integrations/delta-lake-pandas/#delta-lake-lets-pandas-users-time-travel","title":"Delta Lake lets pandas users time travel","text":"<p>Start by creating a Delta table:</p> <pre><code>from deltalake import write_deltalake, DeltaTable\n\ndf = pd.DataFrame({\"num\": [1, 2, 3], \"letter\": [\"a\", \"b\", \"c\"]})\nwrite_deltalake(\"tmp/some-table\", df)\n</code></pre> <p>Here are the contents of the Delta table (version 0 of the Delta table):</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n+-------+----------+\n</code></pre> <p>Now append two rows to the Delta table:</p> <pre><code>df = pd.DataFrame({\"num\": [8, 9], \"letter\": [\"dd\", \"ee\"]})\nwrite_deltalake(\"tmp/some-table\", df, mode=\"append\")\n</code></pre> <p>Here are the contents after the append operation (version 1 of the Delta table):</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre> <p>Now perform an overwrite transaction:</p> <pre><code>df = pd.DataFrame({\"num\": [11, 22], \"letter\": [\"aa\", \"bb\"]})\nwrite_deltalake(\"tmp/some-table\", df, mode=\"overwrite\")\n</code></pre> <p>Here are the contents after the overwrite operation (version 2 of the Delta table):</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre> <p>Read in the Delta table and it will grab the latest version by default:</p> <pre><code>DeltaTable(\"tmp/some-table\").to_pandas()\n\n+-------+----------+\n|   num | letter   |\n|-------+----------|\n|    11 | aa       |\n|    22 | bb       |\n+-------+----------+\n</code></pre> <p>You can easily time travel back to version 0 of the Delta table:</p> <pre><code>DeltaTable(\"tmp/some-table\", version=0).to_pandas()\n\n+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n+-------+----------+\n</code></pre> <p>You can also time travel to version 1 of the Delta table:</p> <pre><code>DeltaTable(\"tmp/some-table\", version=1).to_pandas()\n\n+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre> <p>Time travel is a powerful feature that pandas users cannot access with CSV or Parquet.</p>"},{"location":"integrations/delta-lake-pandas/#schema-enforcement","title":"Schema enforcement","text":"<p>Delta tables only allow you to append DataFrame with matching schema by default.  Suppose you have a DataFrame with <code>num</code> and <code>animal</code> columns, which is different from the Delta table that has columns with <code>num</code> and <code>letter</code> columns.</p> <p>Try to append this DataFrame with a mismatched schema to the existing table:</p> <pre><code>df = pd.DataFrame({\"num\": [5, 6], \"animal\": [\"cat\", \"dog\"]})\nwrite_deltalake(\"tmp/some-table\", df)\n</code></pre> <p>This transaction will be rejected and will return the following error message:</p> <pre><code>ValueError: Schema of data does not match table schema\nData schema:\nnum: int64\nanimal: string\n-- schema metadata --\npandas: '{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"' + 474\nTable Schema:\nnum: int64\nletter: string\n</code></pre> <p>Schema enforcement protects your table from getting corrupted by appending data with mismatched schema.  Parquet and CSV don't offer schema enforcement for pandas users.</p>"},{"location":"integrations/delta-lake-pandas/#overwriting-schema-of-table","title":"Overwriting schema of table","text":"<p>You can overwrite the table contents and schema by setting the <code>overwrite_schema</code> option.  Here's how to overwrite the table contents:</p> <pre><code>write_deltalake(\"tmp/some-table\", df, mode=\"overwrite\", overwrite_schema=True)\n</code></pre> <p>Here are the contents of the table after the values and schema have been overwritten:</p> <pre><code>+-------+----------+\n|   num | animal   |\n|-------+----------|\n|     5 | cat      |\n|     6 | dog      |\n+-------+----------+\n</code></pre>"},{"location":"integrations/delta-lake-pandas/#in-memory-vs-in-storage-data-changes","title":"In-memory vs. in-storage data changes","text":"<p>It's important to distinguish between data stored in-memory and data stored on disk when understanding the functionality offered by Delta Lake.</p> <p>pandas loads data from storage (CSV, Parquet, or Delta Lake) into in-memory DataFrames.</p> <p>pandas makes it easy to modify the data in memory, say update a column value.  It's not easy to update a column value in storage systems like CSV or Parquet using pandas.</p> <p>Delta Lake makes it easy for pandas users to update data in storage.</p>"},{"location":"integrations/delta-lake-pandas/#why-delta-lake-allows-for-faster-queries","title":"Why Delta Lake allows for faster queries","text":"<p>Delta tables store data in many files and metadata about the files in the transaction log.  Delta Lake allows for certain queries to skip entire files, which makes pandas queries run much faster.</p>"},{"location":"integrations/delta-lake-pandas/#more-resources","title":"More resources","text":"<p>See this talk on why Delta Lake is the best file format for pandas analyses to learn more: </p>"},{"location":"integrations/delta-lake-pandas/#conclusion","title":"Conclusion","text":"<p>Delta Lake provides many features that make it an excellent format for pandas analyses:</p> <ul> <li>performance optimizations make pandas queries run faster</li> <li>data management features make pandas analyses more reliable</li> <li>advanced features allow you to perform more complex pandas analyses</li> </ul> <p>Python deltalake offers pandas users a better experience compared with CSV/Parquet.</p>"},{"location":"integrations/delta-lake-polars/","title":"Using Delta Lake with polars","text":"<p>This page explains why Delta Lake is a great storage system for Polars analyses.</p> <p>You will learn how to create Delta tables with Polars, how to query Delta tables with Polars, and the unique advantages Delta Lake offers the Polars community.</p> <p>Here are some amazing benefits that Delta Lake provides Polars users:</p> <ul> <li>time travel</li> <li>ACID transactions for reliable writes</li> <li>better performance with file skipping</li> <li>enhanced file skipping via Z Ordering</li> <li>ability to rollback mistakes</li> <li>and many, many more</li> </ul> <p>Let's start by showing how to use Polars with Delta Lake, explore how Delta Lake can make Polars queries run faster, and then look at all the cool features Delta Lake offers Polars users.</p>"},{"location":"integrations/delta-lake-polars/#creating-a-delta-lake-table-with-polars","title":"Creating a Delta Lake table with Polars","text":"<p>Create a Polars DataFrame and write it out to a Delta table:</p> <pre><code>import polars as pl\n\ndf = pl.DataFrame({\"x\": [1, 2, 3]})\ndf.write_delta(\"tmp/bear_delta_lake\")\n</code></pre> <p>Inspect the contents of the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\"))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 1   |\n| 2   |\n| 3   |\n+-----+\n</code></pre> <p>Now create another Polars DataFrame and append it to the existing Delta table:</p> <pre><code>df2 = pl.DataFrame({\"x\": [8, 9, 10]})\ndf2.write_delta(\"tmp/bear_delta_lake\", mode=\"append\")\n</code></pre> <p>Re-inspect the contents of the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\"))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 1   |\n| 2   |\n| 3   |\n| 8   |\n| 9   |\n| 10  |\n+-----+\n</code></pre> <p>Now overwrite the existing Delta table:</p> <pre><code>df3 = pl.DataFrame({\"x\": [55, 66, 77]})\ndf3.write_delta(\"tmp/bear_delta_lake\", mode=\"overwrite\")\n</code></pre> <p>Inspect the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\"))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 55  |\n| 66  |\n| 77  |\n+-----+\n</code></pre> <p>The Delta table now has three versions, as shown in the following diagram:</p> <p></p>"},{"location":"integrations/delta-lake-polars/#time-travel-with-delta-lake-for-polars","title":"Time travel with Delta Lake for Polars","text":"<p>Time travel back to version 0 of the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\", version=0))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 1   |\n| 2   |\n| 3   |\n+-----+\n</code></pre> <p>Time travel back to version 1 of the Delta table:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\", version=1))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 1   |\n| 2   |\n| 3   |\n| 9   |\n| 8   |\n| 10  |\n+-----+\n</code></pre> <p>Read the Delta table wihout specifying a version and see how it reads the latest version by default:</p> <pre><code>print(pl.read_delta(\"tmp/bear_delta_lake\"))\n\n+-----+\n| x   |\n| --- |\n| i64 |\n+=====+\n| 55  |\n| 66  |\n| 77  |\n+-----+\n</code></pre> <p>Let's dive into how to read Delta tables with Polars in more detail and compare the query runtime performance on larger datasets.</p>"},{"location":"integrations/delta-lake-polars/#reading-a-delta-lake-table-with-polars","title":"Reading a Delta Lake table with Polars","text":"<p>Let's look at the h2o groupby dataset that has 1 billion rows and 9 columns.  Here are the first three rows of the dataset:</p> <pre><code>+-------+-------+--------------+-------+-------+--------+------+------+---------+\n| id1   | id2   | id3          |   id4 |   id5 |    id6 |   v1 |   v2 |      v3 |\n|-------+-------+--------------+-------+-------+--------+------+------+---------|\n| id016 | id046 | id0000109363 |    88 |    13 | 146094 |    4 |    6 | 18.8377 |\n| id039 | id087 | id0000466766 |    14 |    30 | 111330 |    4 |   14 | 46.7973 |\n| id047 | id098 | id0000307804 |    85 |    23 | 187639 |    3 |    5 | 47.5773 |\n+-------+-------+--------------+-------+-------+--------+------+------+---------+\n</code></pre> <p>This dataset is 50GB when stored in an uncompressed CSV file.  Let's run some queries on this dataset when it's stored in different file formats with Polars.</p> <p>This section will show the runtime for a query when the data is stored in CSV, Parquet, and Delta Lake and explain why Delta tables are the fastest.</p> <p></p> <p>Start by running a query on an uncompressed CSV file with <code>read_csv</code>:</p> <pre><code>pl.read_csv(\"~/data/G1_1e9_1e2_0_0.csv\").filter(pl.col(\"id1\") &lt; \"id016\").group_by(\n    [\"id1\", \"id2\"]\n).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query errors out after running for several minutes.  The machine runs out of memory.  Let's try it again with <code>scan_csv</code>.</p> <pre><code>pl.scan_csv(\"~/data/G1_1e9_1e2_0_0.csv\").filter(pl.col(\"id1\") &lt; \"id016\").group_by(\n    [\"id1\", \"id2\"]\n).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query runs in 56.2 seconds.</p> <p>Now let's run the same query when the data is stored in a Parquet file:</p> <pre><code>pl.scan_parquet(\"~/data/G1_1e9_1e2_0_0.parquet\").filter(\n    pl.col(\"id1\") &lt; \"id016\"\n).group_by([\"id1\", \"id2\"]).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query runs in 8.3 seconds.  It's much faster because Polars is optimized to skip row groups in Parquet files that don't contain data that's relevant for the query.</p> <p>Then run the query on newly created Delta table:</p> <pre><code>pl.scan_delta(\"~/data/deltalake/G1_1e9_1e2_0_0\", version=1).filter(\n    pl.col(\"id1\") &lt; \"id016\"\n).group_by([\"id1\", \"id2\"]).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query runs in 7.2 seconds.  Polars can run this query faster because it can inspect the Delta transaction log and skip entire files that don't contain relevant data before performing the ordinary Parquet row group skipping.</p> <p>Finally run the query on the Delta table after it has been Z Ordered by <code>id1</code>:</p> <pre><code>pl.scan_delta(\"~/data/deltalake/G1_1e9_1e2_0_0\", version=2).filter(\n    pl.col(\"id1\") &lt; \"id016\"\n).group_by([\"id1\", \"id2\"]).agg(pl.sum(\"v1\").alias(\"v1_sum\")).collect()\n</code></pre> <p>This query runs in 3.5 seconds.  The query on the Z Ordered Delta table is even faster because similar data has been co-located in the same files.  This allows for even greater data skipping.</p> <p>Polars can leverage file skipping to query Delta tables very quickly.</p>"},{"location":"integrations/delta-lake-polars/#why-polars-is-fast-with-delta-lake","title":"Why Polars is fast with Delta Lake","text":"<p>Delta tables consist of metadata in a transaction log and data stored in Parquet files.</p> <p>When Polars queries a Delta table, it starts by consulting the transaction log to understand the metadata of each file in the Delta table.  This allows for Polars to quickly identify which files should be skipped by the query.</p> <p>CSV files don't contain any such metadata, so file skipping isn't an option.  Polars can skip Parquet files based on metadata, but it needs to open up each file and read the metadata, which is slower that grabbing the file-level metadata directly from the transaction log.</p> <p>Parquet doesn't allow users to easily Z Order the data and colocate similar data in the same row groups.  The Z Order optimizations are only supported in Delta tables.</p> <p>Delta Lake offers Polars users with unique performance optimizations.</p>"},{"location":"integrations/delta-lake-polars/#other-delta-lake-features-relevant-for-polars-users","title":"Other Delta Lake features relevant for Polars users","text":"<ul> <li>ACID transactions for reliable writes</li> <li>better performance with file skipping</li> <li>enhanced file skipping via Z Ordering</li> <li>ability to rollback mistakes</li> </ul>"},{"location":"integrations/delta-lake-polars/#conclusion","title":"Conclusion","text":"<p>This guide shows how Delta Lake is a great storage format for Polars analyses.</p> <p>Delta Lake is easy to use, fast, and full of features that are great for Polars users.</p>"},{"location":"usage/appending-overwriting-delta-lake-table/","title":"Appending to and overwriting a Delta Lake table","text":"<p>This section explains how to append to an exising Delta table and how to overwrite a Delta table.</p>"},{"location":"usage/appending-overwriting-delta-lake-table/#delta-lake-append-transactions","title":"Delta Lake append transactions","text":"<p>Suppose you have a Delta table with the following contents:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n+-------+----------+\n</code></pre> <p>Append two additional rows of data to the table:</p> <pre><code>from deltalake import write_deltalake, DeltaTable\n\ndf = pd.DataFrame({\"num\": [8, 9], \"letter\": [\"dd\", \"ee\"]})\nwrite_deltalake(\"tmp/some-table\", df, mode=\"append\")\n</code></pre> <p>Here are the updated contents of the Delta table:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre> <p>Now let's see how to perform an overwrite transaction.</p>"},{"location":"usage/appending-overwriting-delta-lake-table/#delta-lake-overwrite-transactions","title":"Delta Lake overwrite transactions","text":"<p>Now let's see how to overwrite the exisitng Delta table.</p> <pre><code>df = pd.DataFrame({\"num\": [11, 22], \"letter\": [\"aa\", \"bb\"]})\nwrite_deltalake(\"tmp/some-table\", df, mode=\"overwrite\")\n</code></pre> <p>Here are the contents of the Delta table after the overwrite operation:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|    11 | aa       |\n|    22 | bb       |\n+-------+----------+\n</code></pre> <p>Overwriting just performs a logical delete.  It doesn't physically remove the previous data from storage.  Time travel back to the previous version to confirm that the old version of the table is still accessable.</p> <pre><code>dt = DeltaTable(\"tmp/some-table\", version=1)\n\n+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     8 | dd       |\n|     9 | ee       |\n+-------+----------+\n</code></pre>"},{"location":"usage/constraints/","title":"Adding a Constraint to a table","text":"<p>Check constraints are a way to enforce that only data that meets the constraint is allowed to be added to the table.</p>"},{"location":"usage/constraints/#add-the-constraint","title":"Add the Constraint","text":"Python Rust <p> <code>DeltaTable</code> <pre><code>from deltalake import DeltaTable\n\ndt = DeltaTable(\"../rust/tests/data/simple_table\")\n\n# Check the schema before hand\nprint(dt.schema())\n# Add the constraint to the table.\ndt.alter.add_constraint({\"id_gt_0\": \"id &gt; 0\"})\n</code></pre></p> <p> <code>DeltaTable</code> <pre><code>let table = deltalake::open_table(\"../rust/tests/data/simple_table\").await?;\nlet ops = DeltaOps(table);\nops.with_constraint(\"id_gt_0\", \"id &gt; 0\").await?;\n</code></pre></p> <p>After you have added the constraint to the table attempting to append data to the table that violates the constraint will instead throw an error.</p>"},{"location":"usage/constraints/#verify-the-constraint-by-trying-to-add-some-data","title":"Verify the constraint by trying to add some data","text":"Python Rust <pre><code>from deltalake import write_deltalake\nimport pandas as pd\n\ndf = pd.DataFrame({\"id\": [-1]})\nwrite_deltalake(dt, df, mode=\"append\", engine=\"rust\")\n# _internal.DeltaProtocolError: Invariant violations: [\"Check or Invariant (id &gt; 0) violated by value in row: [-1]\"]\n</code></pre> <pre><code>let table = deltalake::open_table(\"../rust/tests/data/simple_table\").await?;\nlet schema = table.get_state().arrow_schema()?;\nlet invalid_values: Vec&lt;Arc&lt;dyn Array&gt;&gt; = vec![\n    Arc::new(Int32Array::from(vec![-10]))\n];\nlet batch = RecordBatch::try_new(schema, invalid_values)?;\ntable.write(vec![batch]).await?;\n</code></pre> <p>Note: ensure you use the <code>engine='rust'</code> parameter when writing to the table as this feature is not supported in the default pyarrow writer. </p>"},{"location":"usage/create-delta-lake-table/","title":"Creating a Delta Lake Table","text":"<p>This section explains how to create a Delta Lake table.</p> <p>You can easily write a DataFrame to a Delta table.</p> pandasPolars <pre><code>from deltalake import write_deltalake\nimport pandas as pd\n\ndf = pd.DataFrame({\"num\": [1, 2, 3], \"letter\": [\"a\", \"b\", \"c\"]})\nwrite_deltalake(\"tmp/some-table\", df)\n</code></pre> <pre><code>import polars as pl\n\ndf = pl.DataFrame({\"num\": [1, 2, 3], \"letter\": [\"a\", \"b\", \"c\"]})\ndf.write_delta(\"tmp/some-table\")\n</code></pre> <p>Here are the contents of the Delta table in storage:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n+-------+----------+\n</code></pre>"},{"location":"usage/deleting-rows-from-delta-lake-table/","title":"Deleting rows from a Delta Lake table","text":"<p>This section explains how to delete rows from a Delta Lake table.</p> <p>Suppose you have the following Delta table with four rows:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n|     3 | c        |\n|     4 | d        |\n+-------+----------+\n</code></pre> <p>Here's how to delete all the rows where the <code>num</code> is greater than 2:</p> <pre><code>dt = DeltaTable(\"tmp/my-table\")\ndt.delete(\"num &gt; 2\")\n</code></pre> <p>Here are the contents of the Delta table after the delete operation has been performed:</p> <pre><code>+-------+----------+\n|   num | letter   |\n|-------+----------|\n|     1 | a        |\n|     2 | b        |\n+-------+----------+\n</code></pre>"},{"location":"usage/examining-table/","title":"Examining a Table","text":""},{"location":"usage/examining-table/#metadata","title":"Metadata","text":"<p>The delta log maintains basic metadata about a table, including:</p> <ul> <li>A unique <code>id</code></li> <li>A <code>name</code>, if provided</li> <li>A <code>description</code>, if provided</li> <li>The list of <code>partitionColumns</code>.</li> <li>The <code>created_time</code> of the table</li> <li>A map of table <code>configuration</code>. This includes fields such as     <code>delta.appendOnly</code>, which if <code>true</code> indicates the table is not meant     to have data deleted from it.</li> </ul> <p>Get metadata from a table with the DeltaTable.metadata() method:</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/simple_table\")\n&gt;&gt;&gt; dt.metadata()\nMetadata(id: 5fba94ed-9794-4965-ba6e-6ee3c0d22af9, name: None, description: None, partitionColumns: [], created_time: 1587968585495, configuration={})\n</code></pre>"},{"location":"usage/examining-table/#schema","title":"Schema","text":"<p>The schema for the table is also saved in the transaction log. It can either be retrieved in the Delta Lake form as Schema or as a PyArrow schema. The first allows you to introspect any column-level metadata stored in the schema, while the latter represents the schema the table will be loaded into.</p> <p>Use DeltaTable.schema to retrieve the delta lake schema:</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/simple_table\")\n&gt;&gt;&gt; dt.schema()\nSchema([Field(id, PrimitiveType(\"long\"), nullable=True)])\n</code></pre> <p>These schemas have a JSON representation that can be retrieved. To reconstruct from json, use DeltaTable.schema.to_json().</p> <pre><code>&gt;&gt;&gt; dt.schema().to_json()\n'{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}'\n</code></pre> <p>Use DeltaTable.schema.to_pyarrow() to retrieve the PyArrow schema:</p> <pre><code>&gt;&gt;&gt; dt.schema().to_pyarrow()\nid: int64\n</code></pre>"},{"location":"usage/examining-table/#history","title":"History","text":"<p>Depending on what system wrote the table, the delta table may have provenance information describing what operations were performed on the table, when, and by whom. This information is retained for 30 days by default, unless otherwise specified by the table configuration <code>delta.logRetentionDuration</code>.</p> <p>Note</p> <p>This information is not written by all writers and different writers may use different schemas to encode the actions. For Spark\\'s format, see: https://docs.delta.io/latest/delta-utility.html#history-schema</p> <p>To view the available history, use <code>DeltaTable.history</code>:</p> <pre><code>from deltalake import DeltaTable\n\ndt = DeltaTable(\"../rust/tests/data/simple_table\")\ndt.history()\n</code></pre> <pre><code>[{'timestamp': 1587968626537, 'operation': 'DELETE', 'operationParameters': {'predicate': '[\"((`id` % CAST(2 AS BIGINT)) = CAST(0 AS BIGINT))\"]'}, 'readVersion': 3, 'isBlindAppend': False},\n {'timestamp': 1587968614187, 'operation': 'UPDATE', 'operationParameters': {'predicate': '((id#697L % cast(2 as bigint)) = cast(0 as bigint))'}, 'readVersion': 2, 'isBlindAppend': False},\n {'timestamp': 1587968604143, 'operation': 'WRITE', 'operationParameters': {'mode': 'Overwrite', 'partitionBy': '[]'}, 'readVersion': 1, 'isBlindAppend': False},\n {'timestamp': 1587968596254, 'operation': 'MERGE', 'operationParameters': {'predicate': '(oldData.`id` = newData.`id`)'}, 'readVersion': 0, 'isBlindAppend': False},\n {'timestamp': 1587968586154, 'operation': 'WRITE', 'operationParameters': {'mode': 'ErrorIfExists', 'partitionBy': '[]'}, 'isBlindAppend': True}]\n</code></pre>"},{"location":"usage/examining-table/#current-add-actions","title":"Current Add Actions","text":"<p>The active state for a delta table is determined by the Add actions, which provide the list of files that are part of the table and metadata about them, such as creation time, size, and statistics. You can get a data frame of the add actions data using <code>DeltaTable.get_add_actions</code>:</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/delta-0.8.0\")\n&gt;&gt;&gt; dt.get_add_actions(flatten=True).to_pandas()\n                                                    path  size_bytes   modification_time  data_change  num_records  null_count.value  min.value  max.value\n0  part-00000-c9b90f86-73e6-46c8-93ba-ff6bfaf892a...         440 2021-03-06 15:16:07         True            2                 0          0          2\n1  part-00000-04ec9591-0b73-459e-8d18-ba5711d6cbe...         440 2021-03-06 15:16:16         True            2                 0          2          4\n</code></pre> <p>This works even with past versions of the table:</p> <pre><code>&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/delta-0.8.0\", version=0)\n&gt;&gt;&gt; dt.get_add_actions(flatten=True).to_pandas()\n                                                path  size_bytes   modification_time  data_change  num_records  null_count.value  min.value  max.value\n0  part-00000-c9b90f86-73e6-46c8-93ba-ff6bfaf892a...         440 2021-03-06 15:16:07         True            2                 0          0          2\n1  part-00001-911a94a2-43f6-4acb-8620-5e68c265498...         445 2021-03-06 15:16:07         True            3                 0          2          4\n</code></pre>"},{"location":"usage/installation/","title":"Installation","text":"<p>The <code>deltalake</code> project can be installed via pip for Python or Cargo for Rust.</p>"},{"location":"usage/installation/#install-delta-lake-for-python","title":"Install Delta Lake for Python","text":"<p>With pip:</p> <pre><code>pip install deltalake\n</code></pre> <p>With Conda:</p> <pre><code>conda install -c conda-forge deltalake\n</code></pre>"},{"location":"usage/installation/#install-delta-lake-for-rust","title":"Install Delta Lake for Rust","text":"<p>With Cargo:</p> <pre><code>cargo add deltalake\n</code></pre>"},{"location":"usage/installation/#run-delta-lake-and-pandas-in-a-jupyter-notebook","title":"Run Delta Lake and pandas in a Jupyter Notebook","text":"<p>You can easily run Delta Lake and pandas in a Jupyter notebook.</p> <p>Create an environment file with the dependencies as follows:</p> <pre><code>name: deltalake-minimal\nchannels:\n  - conda-forge\n  - defaults\ndependencies:\n  - python=3.11\n  - ipykernel\n  - pandas\n  - polars\n  - jupyterlab\n  - pip\n  - pip:\n    - deltalake\n</code></pre> <p>Create a virtual environment with the dependencies:</p> <pre><code>conda env create -f deltalake-minimal.yml\n</code></pre> <p>Open the Jupyter notebook and run commands as follows:</p> <p></p>"},{"location":"usage/loading-table/","title":"Loading a Delta Table","text":"<p>A DeltaTable represents the state of a delta table at a particular version. This includes which files are currently part of the table, the schema of the table, and other metadata such as creation time.</p>  Python Rust <p> <code>DeltaTable</code> <pre><code>from deltalake import DeltaTable\n\ndt = DeltaTable(\"../rust/tests/data/delta-0.2.0\")\nprint(f\"Version: {dt.version()}\")\nprint(f\"Files: {dt.files()}\")\n</code></pre></p> <p> <code>DeltaTable</code> <pre><code>let table = deltalake::open_table(\"../rust/tests/data/simple_table\").await.unwrap();\nprintln!(\"Version: {}\", table.version());\nprintln!(\"Files: {}\", table.get_files());\n</code></pre></p> <p>Depending on your storage backend, you could use the <code>storage_options</code> parameter to provide some configuration. Configuration is defined for specific backends - s3 options, azure options, gcs options.</p> <pre><code>&gt;&gt;&gt; storage_options = {\"AWS_ACCESS_KEY_ID\": \"THE_AWS_ACCESS_KEY_ID\", \"AWS_SECRET_ACCESS_KEY\":\"THE_AWS_SECRET_ACCESS_KEY\"}\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/delta-0.2.0\", storage_options=storage_options)\n</code></pre> <p>The configuration can also be provided via the environment, and the basic service provider is derived from the URL being used. We try to support many of the well-known formats to identify basic service properties.</p> <p>S3:</p> <ul> <li>s3://\\&lt;bucket&gt;/\\&lt;path&gt;</li> <li>s3a://\\&lt;bucket&gt;/\\&lt;path&gt;</li> </ul> <p>Azure:</p> <ul> <li>az://\\&lt;container&gt;/\\&lt;path&gt;</li> <li>adl://\\&lt;container&gt;/\\&lt;path&gt;</li> <li>abfs://\\&lt;container&gt;/\\&lt;path&gt;</li> </ul> <p>GCS:</p> <ul> <li>gs://\\&lt;bucket&gt;/\\&lt;path&gt;</li> </ul> <p>Alternatively, if you have a data catalog you can load it by reference to a database and table name. Currently only AWS Glue is supported.</p> <p>For AWS Glue catalog, use AWS environment variables to authenticate.</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; from deltalake import DataCatalog\n&gt;&gt;&gt; database_name = \"simple_database\"\n&gt;&gt;&gt; table_name = \"simple_table\"\n&gt;&gt;&gt; data_catalog = DataCatalog.AWS\n&gt;&gt;&gt; dt = DeltaTable.from_data_catalog(data_catalog=data_catalog, database_name=database_name, table_name=table_name)\n&gt;&gt;&gt; dt.to_pyarrow_table().to_pydict()\n{'id': [5, 7, 9, 5, 6, 7, 8, 9]}\n</code></pre>"},{"location":"usage/loading-table/#custom-storage-backends","title":"Custom Storage Backends","text":"<p>While delta always needs its internal storage backend to work and be properly configured, in order to manage the delta log, it may sometime be advantageous - and is common practice in the arrow world - to customize the storage interface used for reading the bulk data.</p> <p><code>deltalake</code> will work with any storage compliant with <code>pyarrow.fs.FileSystem</code>, however the root of the filesystem has to be adjusted to point at the root of the Delta table. We can achieve this by wrapping the custom filesystem into a <code>pyarrow.fs.SubTreeFileSystem</code>.</p> <pre><code>import pyarrow.fs as fs\nfrom deltalake import DeltaTable\n\npath = \"&lt;path/to/table&gt;\"\nfilesystem = fs.SubTreeFileSystem(path, fs.LocalFileSystem())\n\ndt = DeltaTable(path)\nds = dt.to_pyarrow_dataset(filesystem=filesystem)\n</code></pre> <p>When using the pyarrow factory method for file systems, the normalized path is provided on creation. In case of S3 this would look something like:</p> <pre><code>import pyarrow.fs as fs\nfrom deltalake import DeltaTable\n\ntable_uri = \"s3://&lt;bucket&gt;/&lt;path&gt;\"\nraw_fs, normalized_path = fs.FileSystem.from_uri(table_uri)\nfilesystem = fs.SubTreeFileSystem(normalized_path, raw_fs)\n\ndt = DeltaTable(table_uri)\nds = dt.to_pyarrow_dataset(filesystem=filesystem)\n</code></pre>"},{"location":"usage/loading-table/#time-travel","title":"Time Travel","text":"<p>To load previous table states, you can provide the version number you wish to load:</p> <pre><code>&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/simple_table\", version=2)\n</code></pre> <p>Once you\\'ve loaded a table, you can also change versions using either a version number or datetime string:</p> <pre><code>&gt;&gt;&gt; dt.load_version(1)\n&gt;&gt;&gt; dt.load_with_datetime(\"2021-11-04 00:05:23.283+00:00\")\n</code></pre> <p>Warning</p> <p>Previous table versions may not exist if they have been vacuumed, in which case an exception will be thrown. See Vacuuming tables for more information.</p>"},{"location":"usage/managing-tables/","title":"Managing Delta Tables","text":""},{"location":"usage/managing-tables/#vacuuming-tables","title":"Vacuuming tables","text":"<p>Vacuuming a table will delete any files that have been marked for deletion. This may make some past versions of a table invalid, so this can break time travel. However, it will save storage space. Vacuum will retain files in a certain window, by default one week, so time travel will still work in shorter ranges.</p> <p>Delta tables usually don't delete old files automatically, so vacuuming regularly is considered good practice, unless the table is only appended to.</p> <p>Use <code>DeltaTable.vacuum</code> to perform the vacuum operation. Note that to prevent accidental deletion, the function performs a dry-run by default: it will only list the files to be deleted. Pass <code>dry_run=False</code> to actually delete files.</p> <pre><code>&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/simple_table\")\n&gt;&gt;&gt; dt.vacuum()\n['../rust/tests/data/simple_table/part-00006-46f2ff20-eb5d-4dda-8498-7bfb2940713b-c000.snappy.parquet',\n '../rust/tests/data/simple_table/part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet',\n '../rust/tests/data/simple_table/part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet',\n ...]\n&gt;&gt;&gt; dt.vacuum(dry_run=False) # Don't run this unless you are sure!\n</code></pre>"},{"location":"usage/managing-tables/#optimizing-tables","title":"Optimizing tables","text":"<p>Optimizing tables is not currently supported.</p>"},{"location":"usage/overview/","title":"Usage","text":"<p>This guide teaches you how to use Delta Lake.  You will learn how to create Delta tables, run queries, perform DML operations, and optimize your tables.</p> <p>It's easy to use Delta Lake with pandas, Polars, Rust, or any other PyArrow-like DataFrame library.</p> <p>See the Spark Delta Lake documentation if you're using Delta Lake with Spark.</p>"},{"location":"usage/querying-delta-tables/","title":"Querying Delta Tables","text":"<p>Delta tables can be queried in several ways. By loading as Arrow data or an Arrow dataset, they can be used by compatible engines such as Pandas and DuckDB. By passing on the list of files, they can be loaded into other engines such as Dask.</p> <p>Delta tables are often larger than can fit into memory on a single computer, so this module provides ways to read only the parts of the data you need. Partition filters allow you to skip reading files that are part of irrelevant partitions. Only loading the columns required also saves memory. Finally, some methods allow reading tables batch-by-batch, allowing you to process the whole table while only having a portion loaded at any given time.</p> <p>To load into Pandas or a PyArrow table use the <code>DeltaTable.to_pandas</code> and <code>DeltaTable.to_pyarrow_table</code> methods, respectively. Both of these support filtering partitions and selecting particular columns.</p> <pre><code>&gt;&gt;&gt; from deltalake import DeltaTable\n&gt;&gt;&gt; dt = DeltaTable(\"../rust/tests/data/delta-0.8.0-partitioned\")\n&gt;&gt;&gt; dt.schema().to_pyarrow()\nvalue: string\nyear: string\nmonth: string\nday: string\n&gt;&gt;&gt; dt.to_pandas(partitions=[(\"year\", \"=\", \"2021\")], columns=[\"value\"])\n      value\n0     6\n1     7\n2     5\n3     4\n&gt;&gt;&gt; dt.to_pyarrow_table(partitions=[(\"year\", \"=\", \"2021\")], columns=[\"value\"])\npyarrow.Table\nvalue: string\n</code></pre> <p>Converting to a PyArrow Dataset allows you to filter on columns other than partition columns and load the result as a stream of batches rather than a single table. Convert to a dataset using <code>DeltaTable.to_pyarrow_dataset</code>. Filters applied to datasets will use the partition values and file statistics from the Delta transaction log and push down any other filters to the scanning operation.</p> <pre><code>&gt;&gt;&gt; import pyarrow.dataset as ds\n&gt;&gt;&gt; dataset = dt.to_pyarrow_dataset()\n&gt;&gt;&gt; condition = (ds.field(\"year\") == \"2021\") &amp; (ds.field(\"value\") &gt; \"4\")\n&gt;&gt;&gt; dataset.to_table(filter=condition, columns=[\"value\"]).to_pandas()\n  value\n0     6\n1     7\n2     5\n&gt;&gt;&gt; batch_iter = dataset.to_batches(filter=condition, columns=[\"value\"], batch_size=2)\n&gt;&gt;&gt; for batch in batch_iter: print(batch.to_pandas())\n  value\n0     6\n1     7\n  value\n0     5\n</code></pre> <p>PyArrow datasets may also be passed to compatible query engines, such as DuckDB</p> <pre><code>&gt;&gt;&gt; import duckdb\n&gt;&gt;&gt; ex_data = duckdb.arrow(dataset)\n&gt;&gt;&gt; ex_data.filter(\"year = 2021 and value &gt; 4\").project(\"value\")\n---------------------\n-- Expression Tree --\n---------------------\nProjection [value]\n  Filter [year=2021 AND value&gt;4]\n    arrow_scan(140409099470144, 4828104688, 1000000)\n\n---------------------\n-- Result Columns  --\n---------------------\n- value (VARCHAR)\n\n---------------------\n-- Result Preview  --\n---------------------\nvalue\nVARCHAR\n[ Rows: 3]\n6\n7\n5\n</code></pre> <p>Finally, you can always pass the list of file paths to an engine. For example, you can pass them to <code>dask.dataframe.read_parquet</code>:</p> <pre><code>&gt;&gt;&gt; import dask.dataframe as dd\n&gt;&gt;&gt; df = dd.read_parquet(dt.file_uris())\n&gt;&gt;&gt; df\nDask DataFrame Structure:\n                value             year            month              day\nnpartitions=6\n               object  category[known]  category[known]  category[known]\n                  ...              ...              ...              ...\n...               ...              ...              ...              ...\n                  ...              ...              ...              ...\n                  ...              ...              ...              ...\nDask Name: read-parquet, 6 tasks\n&gt;&gt;&gt; df.compute()\n  value  year month day\n0     1  2020     1   1\n0     2  2020     2   3\n0     3  2020     2   5\n0     4  2021     4   5\n0     5  2021    12   4\n0     6  2021    12  20\n1     7  2021    12  20\n</code></pre>"},{"location":"usage/writing-delta-tables/","title":"Writing Delta Tables","text":"<p>For overwrites and appends, use <code>write_deltalake</code>. If the table does not already exist, it will be created. The <code>data</code> parameter will accept a Pandas DataFrame, a PyArrow Table, or an iterator of PyArrow Record Batches.</p> <pre><code>&gt;&gt;&gt; from deltalake import write_deltalake\n&gt;&gt;&gt; df = pd.DataFrame({'x': [1, 2, 3]})\n&gt;&gt;&gt; write_deltalake('path/to/table', df)\n</code></pre> <p>Note: <code>write_deltalake</code> accepts a Pandas DataFrame, but will convert it to a Arrow table before writing. See caveats in <code>pyarrow:python/pandas</code>.</p> <p>By default, writes create a new table and error if it already exists. This is controlled by the <code>mode</code> parameter, which mirrors the behavior of Spark's <code>pyspark.sql.DataFrameWriter.saveAsTable</code> DataFrame method. To overwrite pass in <code>mode='overwrite'</code> and to append pass in <code>mode='append'</code>:</p> <pre><code>&gt;&gt;&gt; write_deltalake('path/to/table', df, mode='overwrite')\n&gt;&gt;&gt; write_deltalake('path/to/table', df, mode='append')\n</code></pre> <p><code>write_deltalake</code> will raise <code>ValueError</code> if the schema of the data passed to it differs from the existing table's schema. If you wish to alter the schema as part of an overwrite pass in <code>overwrite_schema=True</code>.</p>"},{"location":"usage/writing-delta-tables/#overwriting-a-partition","title":"Overwriting a partition","text":"<p>You can overwrite a specific partition by using <code>mode=\"overwrite\"</code> together with <code>partition_filters</code>. This will remove all files within the matching partition and insert your data as new files. This can only be done on one partition at a time. All of the input data must belong to that partition or else the method will raise an error.</p> <pre><code>&gt;&gt;&gt; from deltalake import write_deltalake\n&gt;&gt;&gt; df = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'a', 'b']})\n&gt;&gt;&gt; write_deltalake('path/to/table', df, partition_by=['y'])\n\n&gt;&gt;&gt; table = DeltaTable('path/to/table')\n&gt;&gt;&gt; df2 = pd.DataFrame({'x': [100], 'y': ['b']})\n&gt;&gt;&gt; write_deltalake(table, df2, partition_filters=[('y', '=', 'b')], mode=\"overwrite\")\n\n&gt;&gt;&gt; table.to_pandas()\n     x  y\n0    1  a\n1    2  a\n2  100  b\n</code></pre> <p>This method could also be used to insert a new partition if one doesn't already exist, making this operation idempotent.</p>"},{"location":"usage/optimize/delta-lake-z-order/","title":"Delta Lake Z Order","text":"<p>This section explains how to Z Order a Delta table.</p> <p>Z Ordering colocates similar data in the same files, which allows for better file skipping and faster queries.</p> <p>Suppose you have a table with <code>first_name</code>, <code>age</code>, and <code>country</code> columns.</p> <p>If you Z Order the data by the <code>country</code> column, then individuals from the same country will be stored in the same files.  When you subquently query the data for individuals from a given country, it will execute faster because more data can be skipped.</p> <p>Here's how to Z Order a Delta table:</p> <pre><code>dt = DeltaTable(\"tmp\")\ndt.optimize.z_order([country])\n</code></pre>"},{"location":"usage/optimize/small-file-compaction-with-optimize/","title":"Delta Lake small file compaction with optimize","text":"<p>This post shows you how to perform small file compaction with using the <code>optimize</code> method.  This was added to the <code>DeltaTable</code> class in version 0.9.0.  This command rearranges the small files into larger files which will reduce the number of files and speed up queries.</p> <p>This is very helpful for workloads that append frequently. For example, if you have a table that is appended to every 10 minutes, after a year you will have 52,560 files in the table. If the table is partitioned by another dimension, you will have 52,560 files per partition; with just 100 unique values that's millions of files. By running <code>optimize</code> periodically, you can reduce the number of files in the table to a more manageable number.</p> <p>Typically, you will run optimize less frequently than you append data. If possible, you might run optimize once you know you have finished writing to a particular partition. For example, on a table partitioned by date, you might append data every 10 minutes, but only run optimize once a day at the end of the day. This will ensure you don't need to compact the same data twice.</p> <p>This section will also teach you about how to use <code>vacuum</code> to physically remove files from storage that are no longer needed.  You\u2019ll often want vacuum after running optimize to remove the small files from storage once they\u2019ve been compacted into larger files.</p> <p>Let\u2019s start with an example to explain these key concepts.  All the code covered in this post is stored in this notebook in case you\u2019d like to follow along.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#create-a-delta-table-with-small-files","title":"Create a Delta table with small files","text":"<p>Let\u2019s start by creating a Delta table with a lot of small files so we can demonstrate the usefulness of the <code>optimize</code> command.</p> <p>Start by writing a function that generates on thousand rows of random data given a timestamp.</p> <pre><code>def record_observations(date: datetime) -&gt; pa.Table:\n    \"\"\"Pulls data for a certain datetime\"\"\"\n    nrows = 1000\n    return pa.table(\n        {\n            \"date\": pa.array([date.date()] * nrows),\n            \"timestamp\": pa.array([date] * nrows),\n            \"value\": pc.random(nrows),\n        }\n    )\n</code></pre> <p>Let\u2019s run this function and observe the output:</p> <pre><code>record_observations(datetime(2021, 1, 1, 12)).to_pandas()\n\n    date                timestamp   value\n0   2021-01-01  2021-01-01 12:00:00 0.3186397383362023\n1   2021-01-01  2021-01-01 12:00:00 0.04253766974259088\n2   2021-01-01  2021-01-01 12:00:00 0.9355682965171573\n\u2026\n999 2021-01-01  2021-01-01 12:00:00 0.23207037062879843\n</code></pre> <p>Let\u2019s write 100 hours worth of data to the Delta table.</p> <pre><code># Every hour starting at midnight on 2021-01-01\nhours_iter = (datetime(2021, 1, 1) + timedelta(hours=i) for i in itertools.count())\n\n# Write 100 hours worth of data\nfor timestamp in itertools.islice(hours_iter, 100):\n    write_deltalake(\n        \"observation_data\",\n        record_observations(timestamp),\n        partition_by=[\"date\"],\n        mode=\"append\",\n    )\n</code></pre> <p>This data was appended to the Delta table in 100 separate transactions, so the table will contain 100 transaction log entries and 100 data files.  You can see the number of files with the <code>files()</code> method.</p> <pre><code>dt = DeltaTable(\"observation_data\")\nlen(dt.files()) # 100\n</code></pre> <p>Here\u2019s how the files are persisted in storage.</p> <pre><code>observation_data\n\u251c\u2500\u2500 _delta_log\n\u2502   \u251c\u2500\u2500 00000000000000000000.json\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u2514\u2500\u2500 00000000000000000099.json\n\u251c\u2500\u2500 date=2021-01-01\n\u2502   \u251c\u2500\u2500 0-cfe227c6-edd9-4369-a1b0-db4559a2e693-0.parquet\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u251c\u2500\u2500 23-a4ace29e-e73e-40a1-81d3-0f5dc13093de-0.parquet\n\u251c\u2500\u2500 date=2021-01-02\n\u2502   \u251c\u2500\u2500 24-9698b456-66eb-4075-8732-fe56d81edb60-0.parquet\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u2514\u2500\u2500 47-d3fce527-e018-4c02-8acd-a649f6f523d2-0.parquet\n\u251c\u2500\u2500 date=2021-01-03\n\u2502   \u251c\u2500\u2500 48-fd90a7fa-5a14-42ed-9f59-9fe48d87899d-0.parquet\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u2514\u2500\u2500 71-5f143ade-8ae2-4854-bdc5-61154175665f-0.parquet\n\u251c\u2500\u2500 date=2021-01-04\n\u2502   \u251c\u2500\u2500 72-477c10fe-dc09-4087-80f0-56006e4a7911-0.parquet\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u2514\u2500\u2500 95-1c92cbce-8af4-4fe4-9c11-832245cf4d40-0.parquet\n\u2514\u2500\u2500 date=2021-01-05\n    \u251c\u2500\u2500 96-1b878ee5-25fd-431a-bc3e-6dcacc96b470-0.parquet\n    \u251c\u2500\u2500 \u2026\n    \u2514\u2500\u2500 99-9650ed63-c195-433d-a86b-9469088c14ba-0.parquet\n</code></pre> <p>Each of these Parquet files are tiny - they\u2019re only 10 KB.  Let\u2019s see how to compact these tiny files into larger files, which is more efficient for data queries.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#compact-small-files-in-the-delta-table-with-optimize","title":"Compact small files in the Delta table with optimize","text":"<p>Let\u2019s run the optimize command to compact the existing small files into larger files:</p> <pre><code>dt = DeltaTable(\"observation_data\")\n\ndt.optimize()\n</code></pre> <p>Here\u2019s the output of the command:</p> <pre><code>{'numFilesAdded': 5,\n 'numFilesRemoved': 100,\n 'filesAdded': {'min': 39000,\n  'max': 238282,\n  'avg': 198425.6,\n  'totalFiles': 5,\n  'totalSize': 992128},\n 'filesRemoved': {'min': 10244,\n  'max': 10244,\n  'avg': 10244.0,\n  'totalFiles': 100,\n  'totalSize': 1024400},\n 'partitionsOptimized': 5,\n 'numBatches': 1,\n 'totalConsideredFiles': 100,\n 'totalFilesSkipped': 0,\n 'preserveInsertionOrder': True}\n</code></pre> <p>The optimize operation has added 5 new files and marked 100 exisitng files for removal (this is also known as \u201ctombstoning\u201d files).  It has compacted the 100 tiny files into 5 larger files.</p> <p>Let\u2019s append some more data to the Delta table and see how we can selectively run optimize on the new data that\u2019s added.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#handling-incremental-updates-with-optimize","title":"Handling incremental updates with optimize","text":"<p>Let\u2019s append another 24 hours of data to the Delta table:</p> <pre><code>for timestamp in itertools.islice(hours_iter, 24):\n    write_deltalake(\n        dt,\n        record_observations(timestamp),\n        partition_by=[\"date\"],\n        mode=\"append\",\n    )\n</code></pre> <p>We can use <code>get_add_actions()</code> to introspect the table state. We can see that <code>2021-01-06</code> has only a few hours of data so far, so we don't want to optimize that yet. But <code>2021-01-05</code> has all 24 hours of data, so it's ready to be optimized.</p> <pre><code>dt.get_add_actions(flatten=True).to_pandas()[\n    \"partition.date\"\n].value_counts().sort_index()\n\n2021-01-01     1\n2021-01-02     1\n2021-01-03     1\n2021-01-04     1\n2021-01-05    21\n2021-01-06     4\n</code></pre> <p>To optimize a single partition, you can pass in a <code>partition_filters</code> argument speficying which partitions to optimize.</p> <pre><code>dt.optimize(partition_filters=[(\"date\", \"=\", \"2021-01-05\")])\n\n{'numFilesAdded': 1,\n 'numFilesRemoved': 21,\n 'filesAdded': {'min': 238282,\n  'max': 238282,\n  'avg': 238282.0,\n  'totalFiles': 1,\n  'totalSize': 238282},\n 'filesRemoved': {'min': 10244,\n  'max': 39000,\n  'avg': 11613.333333333334,\n  'totalFiles': 21,\n  'totalSize': 243880},\n 'partitionsOptimized': 1,\n 'numBatches': 1,\n 'totalConsideredFiles': 21,\n 'totalFilesSkipped': 0,\n 'preserveInsertionOrder': True}\n</code></pre> <p>This optimize operation tombstones 21 small data files and adds one file with all the existing data properly condensed.  Let\u2019s take a look a portion of the <code>_delta_log/00000000000000000125.json</code> file, which is the transaction log entry that corresponds with this incremental optimize command.</p> <pre><code>{\n  \"remove\": {\n    \"path\": \"date=2021-01-05/part-00000-41178aab-2491-488f-943d-8f03867295ee-c000.snappy.parquet\",\n    \"deletionTimestamp\": 1683465499480,\n    \"dataChange\": false,\n    \"extendedFileMetadata\": null,\n    \"partitionValues\": {\n      \"date\": \"2021-01-05\"\n    },\n    \"size\": 39000,\n    \"tags\": null\n  }\n}\n\n{\n  \"remove\": {\n    \"path\": \"date=2021-01-05/101-79ae6fc9-c0cc-49ec-bb94-9aba879ac949-0.parquet\",\n    \"deletionTimestamp\": 1683465499481,\n    \"dataChange\": false,\n    \"extendedFileMetadata\": null,\n    \"partitionValues\": {\n      \"date\": \"2021-01-05\"\n    },\n    \"size\": 10244,\n    \"tags\": null\n  }\n}\n\n\u2026\n\n{\n  \"add\": {\n    \"path\": \"date=2021-01-05/part-00000-4b020a40-c836-4a11-851f-4691370c9f3a-c000.snappy.parquet\",\n    \"size\": 238282,\n    \"partitionValues\": {\n      \"date\": \"2021-01-05\"\n    },\n    \"modificationTime\": 1683465499493,\n    \"dataChange\": false,\n    \"stats\": \"{\\\"numRecords\\\":24000,\\\"minValues\\\":{\\\"value\\\":0.00005581532256615507,\\\"timestamp\\\":\\\"2021-01-05T00:00:00.000Z\\\"},\\\"maxValues\\\":{\\\"timestamp\\\":\\\"2021-01-05T23:00:00.000Z\\\",\\\"value\\\":0.9999911402868216},\\\"nullCount\\\":{\\\"timestamp\\\":0,\\\"value\\\":0}}\",\n    \"tags\": null\n  }\n}\n</code></pre> <p>The trasaction log indicates that many files have been tombstoned and one file is added, as expected.</p> <p>The Delta Lake optimize command \u201cremoves\u201d data by marking the data files as removed in the transaction log.  The optimize command doesn\u2019t physically delete the Parquet file from storage.  Optimize performs a \u201clogical remove\u201d not a \u201cphysical remove\u201d.</p> <p>Delta Lake uses logical operations so you can time travel back to earlier versions of your data.  You can vacuum your Delta table to physically remove Parquet files from storage if you don\u2019t need to time travel and don\u2019t want to pay to store the tombstoned files.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#vacuuming-after-optimizing","title":"Vacuuming after optimizing","text":"<p>The vacuum command deletes all files from storage that are marked for removal in the transaction log and older than the retention period which is 7 days by default.</p> <p>It\u2019s normally a good idea to have a retention period of at least 7 days.  For purposes of this example, we will set the retention period to zero, just so you can see how the files get removed from storage.  Adjusting the retention period in this manner isn\u2019t recommended for production use cases.</p> <p>Let\u2019s run the vacuum command:</p> <pre><code>dt.vacuum(retention_hours=0, enforce_retention_duration=False, dry_run=False)\n</code></pre> <p>The command returns a list of all the files that are removed from storage:</p> <pre><code>['date=2021-01-02/39-a98680f2-0e0e-4f26-a491-18b183f9eb05-0.parquet',\n 'date=2021-01-02/41-e96bc8bb-c571-484c-b534-e897424fb7da-0.parquet',\n \u2026\n 'date=2021-01-01/0-cfe227c6-edd9-4369-a1b0-db4559a2e693-0.parquet',\n 'date=2021-01-01/18-ded53418-172b-4e40-bf2e-7c8142e71bd1-0.parquet']\n</code></pre> <p>Let\u2019s look at the content of the Delta table now that all the really small files have been removed from storage:</p> <pre><code>observation_data\n\u251c\u2500\u2500 _delta_log\n\u2502   \u251c\u2500\u2500 00000000000000000000.json\n\u2502   \u251c\u2500\u2500 00000000000000000001.json\n\u2502   \u251c\u2500\u2500 \u2026\n\u2502   \u251c\u2500\u2500 00000000000000000124.json\n\u2502   \u2514\u2500\u2500 00000000000000000125.json\n\u251c\u2500\u2500 date=2021-01-01\n\u2502   \u2514\u2500\u2500 part-00000-31e3df5a-8bbe-425c-b85d-77794f922837-c000.snappy.parquet\n\u251c\u2500\u2500 date=2021-01-02\n\u2502   \u2514\u2500\u2500 part-00000-8af07878-b179-49ce-a900-d58595ffb60a-c000.snappy.parquet\n\u251c\u2500\u2500 date=2021-01-03\n\u2502   \u2514\u2500\u2500 part-00000-5e980864-b32f-4686-a58d-a75fae455c1e-c000.snappy.parquet\n\u251c\u2500\u2500 date=2021-01-04\n\u2502   \u2514\u2500\u2500 part-00000-1e82d23b-084d-47e3-9790-d68289c39837-c000.snappy.parquet\n\u251c\u2500\u2500 date=2021-01-05\n\u2502   \u2514\u2500\u2500 part-00000-4b020a40-c836-4a11-851f-4691370c9f3a-c000.snappy.parquet\n\u2514\u2500\u2500 date=2021-01-06\n    \u251c\u2500\u2500 121-0ecb5d70-4a28-4cd4-b2d2-89ee2285eaaa-0.parquet\n    \u251c\u2500\u2500 122-6b2d2758-9154-4392-b287-fe371ee507ec-0.parquet\n    \u251c\u2500\u2500 123-551d318f-4968-441f-83fc-89f98cd15daf-0.parquet\n    \u2514\u2500\u2500 124-287309d3-662e-449d-b4da-2e67b7cc0557-0.parquet\n</code></pre> <p>All the partitions only contain a single file now, except for the <code>date=2021-01-06</code> partition that has not been compacted yet.</p> <p>An entire partition won\u2019t necessarily get compacted to a single data file when optimize is run.  Each partition has data files that are condensed to the target file size.</p>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#what-causes-the-small-file-problem","title":"What causes the small file problem?","text":"<p>Delta tables can accumulate small files for a variety of reasons:</p> <ul> <li>User error: users can accidentally write files that are too small.  Users should sometimes repartition in memory before writing to disk to avoid appending files that are too small.</li> <li>Frequent appends: systems that append more often tend to append more smaller files.  A pipeline that appends every minute will generally generate ten times as many small files compared to a system that appends every ten minutes.</li> <li>Appending to partitioned data lakes with high cardinality columns can also cause small files.  If you append every hour to a table that\u2019s partitioned on a column with 1,000 distinct values, then every append could create 1,000 new files.  Partitioning by date avoids this problem because the data isn\u2019t split up across partitions in this manner.  </li> </ul>"},{"location":"usage/optimize/small-file-compaction-with-optimize/#conclusion","title":"Conclusion","text":"<p>This page showed you how to create a Delta table with many small files, compact the small files into larger files with optimize, and remove the tombstoned files from storage with vacuum.</p> <p>You also learned about how to incrementally optimize partitioned Delta tables, so you only compact newly added data.</p> <p>An excessive number of small files slows down Delta table queries, so periodic compaction is important.  Make sure to properly maintain your Delta tables, so performance does not degrade over time.</p>"}]}
\ No newline at end of file
diff --git a/sitemap.xml b/sitemap.xml
index ed335d872d..1e7765a152 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,147 +2,152 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
          <loc>https://github.com/delta-io/delta-rs/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
+         <changefreq>daily</changefreq>
+    </url>
+    <url>
+         <loc>https://github.com/delta-io/delta-rs/why-use-delta-lake/</loc>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/catalog/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/delta_writer/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/exceptions/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/schema/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/storage/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/delta_table/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/delta_table/delta_table_alterer/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/delta_table/delta_table_merger/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/delta_table/delta_table_optimizer/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/api/delta_table/metadata/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/how-delta-lake-works/architecture-of-delta-table/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/integrations/delta-lake-arrow/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/integrations/delta-lake-datafusion/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/integrations/delta-lake-pandas/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/integrations/delta-lake-polars/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/appending-overwriting-delta-lake-table/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/constraints/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/create-delta-lake-table/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/deleting-rows-from-delta-lake-table/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/examining-table/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/installation/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/loading-table/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/managing-tables/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/overview/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/querying-delta-tables/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/writing-delta-tables/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/optimize/delta-lake-z-order/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://github.com/delta-io/delta-rs/usage/optimize/small-file-compaction-with-optimize/</loc>
-         <lastmod>2024-01-16</lastmod>
+         <lastmod>2024-01-18</lastmod>
          <changefreq>daily</changefreq>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index b2cb099cca..b21cbdc20c 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ
diff --git a/usage/appending-overwriting-delta-lake-table/index.html b/usage/appending-overwriting-delta-lake-table/index.html
index 32a610b8fc..21384518f1 100644
--- a/usage/appending-overwriting-delta-lake-table/index.html
+++ b/usage/appending-overwriting-delta-lake-table/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1153,6 +1223,8 @@
       
         
       
+        
+      
     
     
     
@@ -1213,6 +1285,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/usage/constraints/index.html b/usage/constraints/index.html
index 93fdab6e22..74c4469924 100644
--- a/usage/constraints/index.html
+++ b/usage/constraints/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1153,6 +1223,8 @@
       
         
       
+        
+      
     
     
     
@@ -1213,6 +1285,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/usage/create-delta-lake-table/index.html b/usage/create-delta-lake-table/index.html
index 6c2d0c92a6..7cb65c1c2f 100644
--- a/usage/create-delta-lake-table/index.html
+++ b/usage/create-delta-lake-table/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
         
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1109,6 +1179,8 @@
       
         
       
+        
+      
     
     
     
@@ -1169,6 +1241,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
@@ -1322,20 +1414,33 @@
 <h1 id="creating-a-delta-lake-table">Creating a Delta Lake Table</h1>
 <p>This section explains how to create a Delta Lake table.</p>
 <p>You can easily write a DataFrame to a Delta table.</p>
+<div class="tabbed-set tabbed-alternate" data-tabs="1:2"><input checked="checked" id="__tabbed_1_1" name="__tabbed_1" type="radio" /><input id="__tabbed_1_2" name="__tabbed_1" type="radio" /><div class="tabbed-labels"><label for="__tabbed_1_1">pandas</label><label for="__tabbed_1_2">Polars</label></div>
+<div class="tabbed-content">
+<div class="tabbed-block">
 <div class="language-python highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">write_deltalake</span>
 </span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
 </span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a>
 </span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;num&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s2">&quot;letter&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">]})</span>
 </span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="n">write_deltalake</span><span class="p">(</span><span class="s2">&quot;tmp/some-table&quot;</span><span class="p">,</span> <span class="n">df</span><span class="p">)</span>
 </span></code></pre></div>
+</div>
+<div class="tabbed-block">
+<div class="language-python highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="kn">import</span> <span class="nn">polars</span> <span class="k">as</span> <span class="nn">pl</span>
+</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a>
+</span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="n">df</span> <span class="o">=</span> <span class="n">pl</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;num&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s2">&quot;letter&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">]})</span>
+</span><span id="__span-1-4"><a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a><span class="n">df</span><span class="o">.</span><span class="n">write_delta</span><span class="p">(</span><span class="s2">&quot;tmp/some-table&quot;</span><span class="p">)</span>
+</span></code></pre></div>
+</div>
+</div>
+</div>
 <p>Here are the contents of the Delta table in storage:</p>
-<div class="language-text highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>+-------+----------+
-</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a>|   num | letter   |
-</span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a>|-------+----------|
-</span><span id="__span-1-4"><a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a>|     1 | a        |
-</span><span id="__span-1-5"><a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a>|     2 | b        |
-</span><span id="__span-1-6"><a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a>|     3 | c        |
-</span><span id="__span-1-7"><a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a>+-------+----------+
+<div class="language-text highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a>+-------+----------+
+</span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a>|   num | letter   |
+</span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a>|-------+----------|
+</span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a>|     1 | a        |
+</span><span id="__span-2-5"><a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a>|     2 | b        |
+</span><span id="__span-2-6"><a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a>|     3 | c        |
+</span><span id="__span-2-7"><a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a>+-------+----------+
 </span></code></pre></div>
 
 
diff --git a/usage/deleting-rows-from-delta-lake-table/index.html b/usage/deleting-rows-from-delta-lake-table/index.html
index 59b73b2170..2ce2a242b2 100644
--- a/usage/deleting-rows-from-delta-lake-table/index.html
+++ b/usage/deleting-rows-from-delta-lake-table/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1109,6 +1179,8 @@
       
         
       
+        
+      
     
     
     
@@ -1169,6 +1241,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/usage/examining-table/index.html b/usage/examining-table/index.html
index e2470c43df..a8dc8f0f59 100644
--- a/usage/examining-table/index.html
+++ b/usage/examining-table/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
         
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1167,6 +1237,8 @@
       
         
       
+        
+      
     
     
     
@@ -1227,6 +1299,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
@@ -1469,33 +1561,35 @@ <h2 id="history">History</h2>
 <a href="https://docs.delta.io/latest/delta-utility.html#history-schema">https://docs.delta.io/latest/delta-utility.html#history-schema</a></p>
 </div>
 <p>To view the available history, use <code>DeltaTable.history</code>:</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
-</span><span id="__span-4-2"><a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/simple_table&quot;</span><span class="p">)</span>
-</span><span id="__span-4-3"><a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">history</span><span class="p">()</span>
-</span><span id="__span-4-4"><a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a><span class="p">[{</span><span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="mi">1587968626537</span><span class="p">,</span> <span class="s1">&#39;operation&#39;</span><span class="p">:</span> <span class="s1">&#39;DELETE&#39;</span><span class="p">,</span> <span class="s1">&#39;operationParameters&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;predicate&#39;</span><span class="p">:</span> <span class="s1">&#39;[&quot;((`id` % CAST(2 AS BIGINT)) = CAST(0 AS BIGINT))&quot;]&#39;</span><span class="p">},</span> <span class="s1">&#39;readVersion&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;isBlindAppend&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
-</span><span id="__span-4-5"><a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a> <span class="p">{</span><span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="mi">1587968614187</span><span class="p">,</span> <span class="s1">&#39;operation&#39;</span><span class="p">:</span> <span class="s1">&#39;UPDATE&#39;</span><span class="p">,</span> <span class="s1">&#39;operationParameters&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;predicate&#39;</span><span class="p">:</span> <span class="s1">&#39;((id#697L </span><span class="si">% c</span><span class="s1">ast(2 as bigint)) = cast(0 as bigint))&#39;</span><span class="p">},</span> <span class="s1">&#39;readVersion&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;isBlindAppend&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
-</span><span id="__span-4-6"><a id="__codelineno-4-6" name="__codelineno-4-6" href="#__codelineno-4-6"></a> <span class="p">{</span><span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="mi">1587968604143</span><span class="p">,</span> <span class="s1">&#39;operation&#39;</span><span class="p">:</span> <span class="s1">&#39;WRITE&#39;</span><span class="p">,</span> <span class="s1">&#39;operationParameters&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;mode&#39;</span><span class="p">:</span> <span class="s1">&#39;Overwrite&#39;</span><span class="p">,</span> <span class="s1">&#39;partitionBy&#39;</span><span class="p">:</span> <span class="s1">&#39;[]&#39;</span><span class="p">},</span> <span class="s1">&#39;readVersion&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;isBlindAppend&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
-</span><span id="__span-4-7"><a id="__codelineno-4-7" name="__codelineno-4-7" href="#__codelineno-4-7"></a> <span class="p">{</span><span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="mi">1587968596254</span><span class="p">,</span> <span class="s1">&#39;operation&#39;</span><span class="p">:</span> <span class="s1">&#39;MERGE&#39;</span><span class="p">,</span> <span class="s1">&#39;operationParameters&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;predicate&#39;</span><span class="p">:</span> <span class="s1">&#39;(oldData.`id` = newData.`id`)&#39;</span><span class="p">},</span> <span class="s1">&#39;readVersion&#39;</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;isBlindAppend&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
-</span><span id="__span-4-8"><a id="__codelineno-4-8" name="__codelineno-4-8" href="#__codelineno-4-8"></a> <span class="p">{</span><span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="mi">1587968586154</span><span class="p">,</span> <span class="s1">&#39;operation&#39;</span><span class="p">:</span> <span class="s1">&#39;WRITE&#39;</span><span class="p">,</span> <span class="s1">&#39;operationParameters&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;mode&#39;</span><span class="p">:</span> <span class="s1">&#39;ErrorIfExists&#39;</span><span class="p">,</span> <span class="s1">&#39;partitionBy&#39;</span><span class="p">:</span> <span class="s1">&#39;[]&#39;</span><span class="p">},</span> <span class="s1">&#39;isBlindAppend&#39;</span><span class="p">:</span> <span class="kc">True</span><span class="p">}]</span>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
+</span><span id="__span-4-2"><a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a>
+</span><span id="__span-4-3"><a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/simple_table&quot;</span><span class="p">)</span>
+</span><span id="__span-4-4"><a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a><span class="n">dt</span><span class="o">.</span><span class="n">history</span><span class="p">()</span>
+</span></code></pre></div>
+<div class="language-text highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a>[{&#39;timestamp&#39;: 1587968626537, &#39;operation&#39;: &#39;DELETE&#39;, &#39;operationParameters&#39;: {&#39;predicate&#39;: &#39;[&quot;((`id` % CAST(2 AS BIGINT)) = CAST(0 AS BIGINT))&quot;]&#39;}, &#39;readVersion&#39;: 3, &#39;isBlindAppend&#39;: False},
+</span><span id="__span-5-2"><a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a> {&#39;timestamp&#39;: 1587968614187, &#39;operation&#39;: &#39;UPDATE&#39;, &#39;operationParameters&#39;: {&#39;predicate&#39;: &#39;((id#697L % cast(2 as bigint)) = cast(0 as bigint))&#39;}, &#39;readVersion&#39;: 2, &#39;isBlindAppend&#39;: False},
+</span><span id="__span-5-3"><a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a> {&#39;timestamp&#39;: 1587968604143, &#39;operation&#39;: &#39;WRITE&#39;, &#39;operationParameters&#39;: {&#39;mode&#39;: &#39;Overwrite&#39;, &#39;partitionBy&#39;: &#39;[]&#39;}, &#39;readVersion&#39;: 1, &#39;isBlindAppend&#39;: False},
+</span><span id="__span-5-4"><a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a> {&#39;timestamp&#39;: 1587968596254, &#39;operation&#39;: &#39;MERGE&#39;, &#39;operationParameters&#39;: {&#39;predicate&#39;: &#39;(oldData.`id` = newData.`id`)&#39;}, &#39;readVersion&#39;: 0, &#39;isBlindAppend&#39;: False},
+</span><span id="__span-5-5"><a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a> {&#39;timestamp&#39;: 1587968586154, &#39;operation&#39;: &#39;WRITE&#39;, &#39;operationParameters&#39;: {&#39;mode&#39;: &#39;ErrorIfExists&#39;, &#39;partitionBy&#39;: &#39;[]&#39;}, &#39;isBlindAppend&#39;: True}]
 </span></code></pre></div>
 <h2 id="current-add-actions">Current Add Actions</h2>
 <p>The active state for a delta table is determined by the Add actions,
 which provide the list of files that are part of the table and metadata
 about them, such as creation time, size, and statistics. You can get a
 data frame of the add actions data using <code>DeltaTable.get_add_actions</code>:</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
-</span><span id="__span-5-2"><a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/delta-0.8.0&quot;</span><span class="p">)</span>
-</span><span id="__span-5-3"><a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">get_add_actions</span><span class="p">(</span><span class="n">flatten</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
-</span><span id="__span-5-4"><a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a>                                                    <span class="n">path</span>  <span class="n">size_bytes</span>   <span class="n">modification_time</span>  <span class="n">data_change</span>  <span class="n">num_records</span>  <span class="n">null_count</span><span class="o">.</span><span class="n">value</span>  <span class="nb">min</span><span class="o">.</span><span class="n">value</span>  <span class="nb">max</span><span class="o">.</span><span class="n">value</span>
-</span><span id="__span-5-5"><a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="mi">0</span>  <span class="n">part</span><span class="o">-</span><span class="mi">00000</span><span class="o">-</span><span class="n">c9b90f86</span><span class="o">-</span><span class="mf">73e6</span><span class="o">-</span><span class="mi">46</span><span class="n">c8</span><span class="o">-</span><span class="mi">93</span><span class="n">ba</span><span class="o">-</span><span class="n">ff6bfaf892a</span><span class="o">...</span>         <span class="mi">440</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">06</span> <span class="mi">15</span><span class="p">:</span><span class="mi">16</span><span class="p">:</span><span class="mi">07</span>         <span class="kc">True</span>            <span class="mi">2</span>                 <span class="mi">0</span>          <span class="mi">0</span>          <span class="mi">2</span>
-</span><span id="__span-5-6"><a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a><span class="mi">1</span>  <span class="n">part</span><span class="o">-</span><span class="mi">00000</span><span class="o">-</span><span class="mi">04</span><span class="n">ec9591</span><span class="o">-</span><span class="mi">0</span><span class="n">b73</span><span class="o">-</span><span class="mf">459e-8</span><span class="n">d18</span><span class="o">-</span><span class="n">ba5711d6cbe</span><span class="o">...</span>         <span class="mi">440</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">06</span> <span class="mi">15</span><span class="p">:</span><span class="mi">16</span><span class="p">:</span><span class="mi">16</span>         <span class="kc">True</span>            <span class="mi">2</span>                 <span class="mi">0</span>          <span class="mi">2</span>          <span class="mi">4</span>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
+</span><span id="__span-6-2"><a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/delta-0.8.0&quot;</span><span class="p">)</span>
+</span><span id="__span-6-3"><a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">get_add_actions</span><span class="p">(</span><span class="n">flatten</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
+</span><span id="__span-6-4"><a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a>                                                    <span class="n">path</span>  <span class="n">size_bytes</span>   <span class="n">modification_time</span>  <span class="n">data_change</span>  <span class="n">num_records</span>  <span class="n">null_count</span><span class="o">.</span><span class="n">value</span>  <span class="nb">min</span><span class="o">.</span><span class="n">value</span>  <span class="nb">max</span><span class="o">.</span><span class="n">value</span>
+</span><span id="__span-6-5"><a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a><span class="mi">0</span>  <span class="n">part</span><span class="o">-</span><span class="mi">00000</span><span class="o">-</span><span class="n">c9b90f86</span><span class="o">-</span><span class="mf">73e6</span><span class="o">-</span><span class="mi">46</span><span class="n">c8</span><span class="o">-</span><span class="mi">93</span><span class="n">ba</span><span class="o">-</span><span class="n">ff6bfaf892a</span><span class="o">...</span>         <span class="mi">440</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">06</span> <span class="mi">15</span><span class="p">:</span><span class="mi">16</span><span class="p">:</span><span class="mi">07</span>         <span class="kc">True</span>            <span class="mi">2</span>                 <span class="mi">0</span>          <span class="mi">0</span>          <span class="mi">2</span>
+</span><span id="__span-6-6"><a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a><span class="mi">1</span>  <span class="n">part</span><span class="o">-</span><span class="mi">00000</span><span class="o">-</span><span class="mi">04</span><span class="n">ec9591</span><span class="o">-</span><span class="mi">0</span><span class="n">b73</span><span class="o">-</span><span class="mf">459e-8</span><span class="n">d18</span><span class="o">-</span><span class="n">ba5711d6cbe</span><span class="o">...</span>         <span class="mi">440</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">06</span> <span class="mi">15</span><span class="p">:</span><span class="mi">16</span><span class="p">:</span><span class="mi">16</span>         <span class="kc">True</span>            <span class="mi">2</span>                 <span class="mi">0</span>          <span class="mi">2</span>          <span class="mi">4</span>
 </span></code></pre></div>
 <p>This works even with past versions of the table:</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/delta-0.8.0&quot;</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
-</span><span id="__span-6-2"><a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">get_add_actions</span><span class="p">(</span><span class="n">flatten</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
-</span><span id="__span-6-3"><a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a>                                                <span class="n">path</span>  <span class="n">size_bytes</span>   <span class="n">modification_time</span>  <span class="n">data_change</span>  <span class="n">num_records</span>  <span class="n">null_count</span><span class="o">.</span><span class="n">value</span>  <span class="nb">min</span><span class="o">.</span><span class="n">value</span>  <span class="nb">max</span><span class="o">.</span><span class="n">value</span>
-</span><span id="__span-6-4"><a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a><span class="mi">0</span>  <span class="n">part</span><span class="o">-</span><span class="mi">00000</span><span class="o">-</span><span class="n">c9b90f86</span><span class="o">-</span><span class="mf">73e6</span><span class="o">-</span><span class="mi">46</span><span class="n">c8</span><span class="o">-</span><span class="mi">93</span><span class="n">ba</span><span class="o">-</span><span class="n">ff6bfaf892a</span><span class="o">...</span>         <span class="mi">440</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">06</span> <span class="mi">15</span><span class="p">:</span><span class="mi">16</span><span class="p">:</span><span class="mi">07</span>         <span class="kc">True</span>            <span class="mi">2</span>                 <span class="mi">0</span>          <span class="mi">0</span>          <span class="mi">2</span>
-</span><span id="__span-6-5"><a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a><span class="mi">1</span>  <span class="n">part</span><span class="o">-</span><span class="mi">00001</span><span class="o">-</span><span class="mi">911</span><span class="n">a94a2</span><span class="o">-</span><span class="mi">43</span><span class="n">f6</span><span class="o">-</span><span class="mi">4</span><span class="n">acb</span><span class="o">-</span><span class="mi">8620</span><span class="o">-</span><span class="mf">5e68</span><span class="n">c265498</span><span class="o">...</span>         <span class="mi">445</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">06</span> <span class="mi">15</span><span class="p">:</span><span class="mi">16</span><span class="p">:</span><span class="mi">07</span>         <span class="kc">True</span>            <span class="mi">3</span>                 <span class="mi">0</span>          <span class="mi">2</span>          <span class="mi">4</span>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-7-1"><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/delta-0.8.0&quot;</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+</span><span id="__span-7-2"><a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">get_add_actions</span><span class="p">(</span><span class="n">flatten</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span>
+</span><span id="__span-7-3"><a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a>                                                <span class="n">path</span>  <span class="n">size_bytes</span>   <span class="n">modification_time</span>  <span class="n">data_change</span>  <span class="n">num_records</span>  <span class="n">null_count</span><span class="o">.</span><span class="n">value</span>  <span class="nb">min</span><span class="o">.</span><span class="n">value</span>  <span class="nb">max</span><span class="o">.</span><span class="n">value</span>
+</span><span id="__span-7-4"><a id="__codelineno-7-4" name="__codelineno-7-4" href="#__codelineno-7-4"></a><span class="mi">0</span>  <span class="n">part</span><span class="o">-</span><span class="mi">00000</span><span class="o">-</span><span class="n">c9b90f86</span><span class="o">-</span><span class="mf">73e6</span><span class="o">-</span><span class="mi">46</span><span class="n">c8</span><span class="o">-</span><span class="mi">93</span><span class="n">ba</span><span class="o">-</span><span class="n">ff6bfaf892a</span><span class="o">...</span>         <span class="mi">440</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">06</span> <span class="mi">15</span><span class="p">:</span><span class="mi">16</span><span class="p">:</span><span class="mi">07</span>         <span class="kc">True</span>            <span class="mi">2</span>                 <span class="mi">0</span>          <span class="mi">0</span>          <span class="mi">2</span>
+</span><span id="__span-7-5"><a id="__codelineno-7-5" name="__codelineno-7-5" href="#__codelineno-7-5"></a><span class="mi">1</span>  <span class="n">part</span><span class="o">-</span><span class="mi">00001</span><span class="o">-</span><span class="mi">911</span><span class="n">a94a2</span><span class="o">-</span><span class="mi">43</span><span class="n">f6</span><span class="o">-</span><span class="mi">4</span><span class="n">acb</span><span class="o">-</span><span class="mi">8620</span><span class="o">-</span><span class="mf">5e68</span><span class="n">c265498</span><span class="o">...</span>         <span class="mi">445</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">06</span> <span class="mi">15</span><span class="p">:</span><span class="mi">16</span><span class="p">:</span><span class="mi">07</span>         <span class="kc">True</span>            <span class="mi">3</span>                 <span class="mi">0</span>          <span class="mi">2</span>          <span class="mi">4</span>
 </span></code></pre></div>
 
 
diff --git a/usage/installation/index.html b/usage/installation/index.html
index f6275fa886..dd862f3db0 100644
--- a/usage/installation/index.html
+++ b/usage/installation/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://github.com/delta-io/delta-rs/usage/installation/">
       
       
-        <link rel="prev" href="../..">
+        <link rel="prev" href="../../why-use-delta-lake/">
       
       
         <link rel="next" href="../overview/">
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1160,6 +1230,8 @@
       
         
       
+        
+      
     
     
     
@@ -1220,6 +1292,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
@@ -1457,7 +1549,7 @@ <h2 id="run-delta-lake-and-pandas-in-a-jupyter-notebook">Run Delta Lake and pand
       <nav class="md-footer__inner md-grid" aria-label="Footer" >
         
           
-          <a href="../.." class="md-footer__link md-footer__link--prev" aria-label="Previous: Home">
+          <a href="../../why-use-delta-lake/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Why Use Delta Lake">
             <div class="md-footer__button md-icon">
               
               <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
@@ -1467,7 +1559,7 @@ <h2 id="run-delta-lake-and-pandas-in-a-jupyter-notebook">Run Delta Lake and pand
                 Previous
               </span>
               <div class="md-ellipsis">
-                Home
+                Why Use Delta Lake
               </div>
             </div>
           </a>
diff --git a/usage/loading-table/index.html b/usage/loading-table/index.html
index 6481b0fde3..d17e5caad5 100644
--- a/usage/loading-table/index.html
+++ b/usage/loading-table/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+          
         
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1153,6 +1223,8 @@
       
         
       
+        
+      
     
     
     
@@ -1213,6 +1285,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
@@ -1386,9 +1478,30 @@
 
 
 <h1 id="loading-a-delta-table">Loading a Delta Table</h1>
-<p>To load the current version, use the constructor:</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/delta-0.2.0&quot;</span><span class="p">)</span>
-</span></code></pre></div>
+<p>A <a class="autorefs autorefs-internal" href="../../api/delta_table/#deltalake.DeltaTable">DeltaTable</a> represents the state of a
+delta table at a particular version. This includes which files are
+currently part of the table, the schema of the table, and other metadata
+such as creation time.</p>
+<div class="tabbed-set tabbed-alternate" data-tabs="1:2"><input checked="checked" id="__tabbed_1_1" name="__tabbed_1" type="radio" /><input id="__tabbed_1_2" name="__tabbed_1" type="radio" /><div class="tabbed-labels"><label for="__tabbed_1_1"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M439.8 200.5c-7.7-30.9-22.3-54.2-53.4-54.2h-40.1v47.4c0 36.8-31.2 67.8-66.8 67.8H172.7c-29.2 0-53.4 25-53.4 54.3v101.8c0 29 25.2 46 53.4 54.3 33.8 9.9 66.3 11.7 106.8 0 26.9-7.8 53.4-23.5 53.4-54.3v-40.7H226.2v-13.6h160.2c31.1 0 42.6-21.7 53.4-54.2 11.2-33.5 10.7-65.7 0-108.6zM286.2 404c11.1 0 20.1 9.1 20.1 20.3 0 11.3-9 20.4-20.1 20.4-11 0-20.1-9.2-20.1-20.4.1-11.3 9.1-20.3 20.1-20.3zM167.8 248.1h106.8c29.7 0 53.4-24.5 53.4-54.3V91.9c0-29-24.4-50.7-53.4-55.6-35.8-5.9-74.7-5.6-106.8.1-45.2 8-53.4 24.7-53.4 55.6v40.7h106.9v13.6h-147c-31.1 0-58.3 18.7-66.8 54.2-9.8 40.7-10.2 66.1 0 108.6 7.6 31.6 25.7 54.2 56.8 54.2H101v-48.8c0-35.3 30.5-66.4 66.8-66.4zm-6.7-142.6c-11.1 0-20.1-9.1-20.1-20.3.1-11.3 9-20.4 20.1-20.4 11 0 20.1 9.2 20.1 20.4s-9 20.3-20.1 20.3z"/></svg></span> Python</label><label for="__tabbed_1_2"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="m508.52 249.75-21.82-13.51c-.17-2-.34-3.93-.55-5.88l18.72-17.5a7.35 7.35 0 0 0-2.44-12.25l-24-9c-.54-1.88-1.08-3.78-1.67-5.64l15-20.83a7.35 7.35 0 0 0-4.79-11.54l-25.42-4.15c-.9-1.73-1.79-3.45-2.73-5.15l10.68-23.42a7.35 7.35 0 0 0-6.95-10.39l-25.82.91q-1.79-2.22-3.61-4.4L439 81.84a7.36 7.36 0 0 0-8.84-8.84L405 78.93q-2.17-1.83-4.4-3.61l.91-25.82a7.35 7.35 0 0 0-10.39-7L367.7 53.23c-1.7-.94-3.43-1.84-5.15-2.73l-4.15-25.42a7.35 7.35 0 0 0-11.54-4.79L326 35.26c-1.86-.59-3.75-1.13-5.64-1.67l-9-24a7.35 7.35 0 0 0-12.25-2.44l-17.5 18.72c-1.95-.21-3.91-.38-5.88-.55L262.25 3.48a7.35 7.35 0 0 0-12.5 0L236.24 25.3c-2 .17-3.93.34-5.88.55l-17.5-18.72a7.35 7.35 0 0 0-12.25 2.44l-9 24c-1.89.55-3.79 1.08-5.66 1.68l-20.82-15a7.35 7.35 0 0 0-11.54 4.79l-4.15 25.41c-1.73.9-3.45 1.79-5.16 2.73l-23.4-10.63a7.35 7.35 0 0 0-10.39 7l.92 25.81c-1.49 1.19-3 2.39-4.42 3.61L81.84 73A7.36 7.36 0 0 0 73 81.84L78.93 107c-1.23 1.45-2.43 2.93-3.62 4.41l-25.81-.91a7.42 7.42 0 0 0-6.37 3.26 7.35 7.35 0 0 0-.57 7.13l10.66 23.41c-.94 1.7-1.83 3.43-2.73 5.16l-25.41 4.14a7.35 7.35 0 0 0-4.79 11.54l15 20.82c-.59 1.87-1.13 3.77-1.68 5.66l-24 9a7.35 7.35 0 0 0-2.44 12.25l18.72 17.5c-.21 1.95-.38 3.91-.55 5.88l-21.86 13.5a7.35 7.35 0 0 0 0 12.5l21.82 13.51c.17 2 .34 3.92.55 5.87l-18.72 17.5a7.35 7.35 0 0 0 2.44 12.25l24 9c.55 1.89 1.08 3.78 1.68 5.65l-15 20.83a7.35 7.35 0 0 0 4.79 11.54l25.42 4.15c.9 1.72 1.79 3.45 2.73 5.14l-10.63 23.43a7.35 7.35 0 0 0 .57 7.13 7.13 7.13 0 0 0 6.37 3.26l25.83-.91q1.77 2.22 3.6 4.4L73 430.16a7.36 7.36 0 0 0 8.84 8.84l25.16-5.93q2.18 1.83 4.41 3.61l-.92 25.82a7.35 7.35 0 0 0 10.39 6.95l23.43-10.68c1.69.94 3.42 1.83 5.14 2.73l4.15 25.42a7.34 7.34 0 0 0 11.54 4.78l20.83-15c1.86.6 3.76 1.13 5.65 1.68l9 24a7.36 7.36 0 0 0 12.25 2.44l17.5-18.72c1.95.21 3.92.38 5.88.55l13.51 21.82a7.35 7.35 0 0 0 12.5 0l13.51-21.82c2-.17 3.93-.34 5.88-.56l17.5 18.73a7.36 7.36 0 0 0 12.25-2.44l9-24c1.89-.55 3.78-1.08 5.65-1.68l20.82 15a7.34 7.34 0 0 0 11.54-4.78l4.15-25.42c1.72-.9 3.45-1.79 5.15-2.73l23.42 10.68a7.35 7.35 0 0 0 10.39-6.95l-.91-25.82q2.22-1.79 4.4-3.61l25.15 5.93a7.36 7.36 0 0 0 8.84-8.84L433.07 405q1.83-2.17 3.61-4.4l25.82.91a7.23 7.23 0 0 0 6.37-3.26 7.35 7.35 0 0 0 .58-7.13l-10.68-23.42c.94-1.7 1.83-3.43 2.73-5.15l25.42-4.15a7.35 7.35 0 0 0 4.79-11.54l-15-20.83c.59-1.87 1.13-3.76 1.67-5.65l24-9a7.35 7.35 0 0 0 2.44-12.25l-18.72-17.5c.21-1.95.38-3.91.55-5.87l21.82-13.51a7.35 7.35 0 0 0 0-12.5Zm-151 129.08A13.91 13.91 0 0 0 341 389.51l-7.64 35.67a187.51 187.51 0 0 1-156.36-.74l-7.64-35.66a13.87 13.87 0 0 0-16.46-10.68l-31.51 6.76a187.38 187.38 0 0 1-16.26-19.21H258.3c1.72 0 2.89-.29 2.89-1.91v-54.19c0-1.57-1.17-1.91-2.89-1.91h-44.83l.05-34.35H262c4.41 0 23.66 1.28 29.79 25.87 1.91 7.55 6.17 32.14 9.06 40 2.89 8.82 14.6 26.46 27.1 26.46H407a187.3 187.3 0 0 1-17.34 20.09Zm25.77 34.49A15.24 15.24 0 1 1 368 398.08h.44a15.23 15.23 0 0 1 14.8 15.24Zm-225.62-.68a15.24 15.24 0 1 1-15.25-15.25h.45a15.25 15.25 0 0 1 14.75 15.25Zm-88.1-178.49 32.83-14.6a13.88 13.88 0 0 0 7.06-18.33L102.69 186h26.56v119.73h-53.6a187.65 187.65 0 0 1-6.08-71.58Zm-11.26-36.06a15.24 15.24 0 0 1 15.23-15.25H74a15.24 15.24 0 1 1-15.67 15.24Zm155.16 24.49.05-35.32h63.26c3.28 0 23.07 3.77 23.07 18.62 0 12.29-15.19 16.7-27.68 16.7ZM399 306.71c-9.8 1.13-20.63-4.12-22-10.09-5.78-32.49-15.39-39.4-30.57-51.4 18.86-11.95 38.46-29.64 38.46-53.26 0-25.52-17.49-41.59-29.4-49.48-16.76-11-35.28-13.23-40.27-13.23h-198.9a187.49 187.49 0 0 1 104.89-59.19l23.47 24.6a13.82 13.82 0 0 0 19.6.44l26.26-25a187.51 187.51 0 0 1 128.37 91.43l-18 40.57a14 14 0 0 0 7.09 18.33l34.59 15.33a187.12 187.12 0 0 1 .4 32.54h-19.28c-1.91 0-2.69 1.27-2.69 3.13v8.82C421 301 409.31 305.58 399 306.71ZM240 60.21A15.24 15.24 0 0 1 255.21 45h.45A15.24 15.24 0 1 1 240 60.21ZM436.84 214a15.24 15.24 0 1 1 0-30.48h.44a15.24 15.24 0 0 1-.44 30.48Z"/></svg></span> Rust</label></div>
+<div class="tabbed-content">
+<div class="tabbed-block">
+<p><a href="https://delta-io.github.io/delta-rs/api//delta_table"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M7 7H5a2 2 0 0 0-2 2v8h2v-4h2v4h2V9a2 2 0 0 0-2-2m0 4H5V9h2m7-2h-4v10h2v-4h2a2 2 0 0 0 2-2V9a2 2 0 0 0-2-2m0 4h-2V9h2m6 0v6h1v2h-4v-2h1V9h-1V7h4v2Z"/></svg></span>  <code>DeltaTable</code></a>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
+</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>
+</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/delta-0.2.0&quot;</span><span class="p">)</span>
+</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Version: </span><span class="si">{</span><span class="n">dt</span><span class="o">.</span><span class="n">version</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Files: </span><span class="si">{</span><span class="n">dt</span><span class="o">.</span><span class="n">files</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
+</span></code></pre></div></p>
+</div>
+<div class="tabbed-block">
+<p><a href="https://docs.rs/deltalake/latest/deltalake/table/struct.DeltaTable.html"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M7 7H5a2 2 0 0 0-2 2v8h2v-4h2v4h2V9a2 2 0 0 0-2-2m0 4H5V9h2m7-2h-4v10h2v-4h2a2 2 0 0 0 2-2V9a2 2 0 0 0-2-2m0 4h-2V9h2m6 0v6h1v2h-4v-2h1V9h-1V7h4v2Z"/></svg></span>  <code>DeltaTable</code></a>
+<div class="language-rust highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="kd">let</span><span class="w"> </span><span class="n">table</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">deltalake</span>::<span class="n">open_table</span><span class="p">(</span><span class="s">&quot;../rust/tests/data/simple_table&quot;</span><span class="p">).</span><span class="k">await</span><span class="p">.</span><span class="n">unwrap</span><span class="p">();</span>
+</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="fm">println!</span><span class="p">(</span><span class="s">&quot;Version: {}&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">table</span><span class="p">.</span><span class="n">version</span><span class="p">());</span>
+</span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="fm">println!</span><span class="p">(</span><span class="s">&quot;Files: {}&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">table</span><span class="p">.</span><span class="n">get_files</span><span class="p">());</span>
+</span></code></pre></div></p>
+</div>
+</div>
+</div>
 <p>Depending on your storage backend, you could use the <code>storage_options</code>
 parameter to provide some configuration. Configuration is defined for
 specific backends - <a href="https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants">s3
@@ -1397,8 +1510,8 @@ <h1 id="loading-a-delta-table">Loading a Delta Table</h1>
 options</a>,
 <a href="https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants">gcs
 options</a>.</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">storage_options</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;AWS_ACCESS_KEY_ID&quot;</span><span class="p">:</span> <span class="s2">&quot;THE_AWS_ACCESS_KEY_ID&quot;</span><span class="p">,</span> <span class="s2">&quot;AWS_SECRET_ACCESS_KEY&quot;</span><span class="p">:</span><span class="s2">&quot;THE_AWS_SECRET_ACCESS_KEY&quot;</span><span class="p">}</span>
-</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/delta-0.2.0&quot;</span><span class="p">,</span> <span class="n">storage_options</span><span class="o">=</span><span class="n">storage_options</span><span class="p">)</span>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">storage_options</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;AWS_ACCESS_KEY_ID&quot;</span><span class="p">:</span> <span class="s2">&quot;THE_AWS_ACCESS_KEY_ID&quot;</span><span class="p">,</span> <span class="s2">&quot;AWS_SECRET_ACCESS_KEY&quot;</span><span class="p">:</span><span class="s2">&quot;THE_AWS_SECRET_ACCESS_KEY&quot;</span><span class="p">}</span>
+</span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/delta-0.2.0&quot;</span><span class="p">,</span> <span class="n">storage_options</span><span class="o">=</span><span class="n">storage_options</span><span class="p">)</span>
 </span></code></pre></div>
 <p>The configuration can also be provided via the environment, and the
 basic service provider is derived from the URL being used. We try to
@@ -1428,14 +1541,14 @@ <h1 id="loading-a-delta-table">Loading a Delta Table</h1>
 <p>Alternatively, if you have a data catalog you can load it by reference
 to a database and table name. Currently only AWS Glue is supported.</p>
 <p>For AWS Glue catalog, use AWS environment variables to authenticate.</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
-</span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DataCatalog</span>
-</span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">database_name</span> <span class="o">=</span> <span class="s2">&quot;simple_database&quot;</span>
-</span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">table_name</span> <span class="o">=</span> <span class="s2">&quot;simple_table&quot;</span>
-</span><span id="__span-2-5"><a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">data_catalog</span> <span class="o">=</span> <span class="n">DataCatalog</span><span class="o">.</span><span class="n">AWS</span>
-</span><span id="__span-2-6"><a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="o">.</span><span class="n">from_data_catalog</span><span class="p">(</span><span class="n">data_catalog</span><span class="o">=</span><span class="n">data_catalog</span><span class="p">,</span> <span class="n">database_name</span><span class="o">=</span><span class="n">database_name</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="n">table_name</span><span class="p">)</span>
-</span><span id="__span-2-7"><a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">to_pyarrow_table</span><span class="p">()</span><span class="o">.</span><span class="n">to_pydict</span><span class="p">()</span>
-</span><span id="__span-2-8"><a id="__codelineno-2-8" name="__codelineno-2-8" href="#__codelineno-2-8"></a><span class="p">{</span><span class="s1">&#39;id&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">9</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">]}</span>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
+</span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DataCatalog</span>
+</span><span id="__span-3-3"><a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">database_name</span> <span class="o">=</span> <span class="s2">&quot;simple_database&quot;</span>
+</span><span id="__span-3-4"><a id="__codelineno-3-4" name="__codelineno-3-4" href="#__codelineno-3-4"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">table_name</span> <span class="o">=</span> <span class="s2">&quot;simple_table&quot;</span>
+</span><span id="__span-3-5"><a id="__codelineno-3-5" name="__codelineno-3-5" href="#__codelineno-3-5"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">data_catalog</span> <span class="o">=</span> <span class="n">DataCatalog</span><span class="o">.</span><span class="n">AWS</span>
+</span><span id="__span-3-6"><a id="__codelineno-3-6" name="__codelineno-3-6" href="#__codelineno-3-6"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="o">.</span><span class="n">from_data_catalog</span><span class="p">(</span><span class="n">data_catalog</span><span class="o">=</span><span class="n">data_catalog</span><span class="p">,</span> <span class="n">database_name</span><span class="o">=</span><span class="n">database_name</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="n">table_name</span><span class="p">)</span>
+</span><span id="__span-3-7"><a id="__codelineno-3-7" name="__codelineno-3-7" href="#__codelineno-3-7"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">to_pyarrow_table</span><span class="p">()</span><span class="o">.</span><span class="n">to_pydict</span><span class="p">()</span>
+</span><span id="__span-3-8"><a id="__codelineno-3-8" name="__codelineno-3-8" href="#__codelineno-3-8"></a><span class="p">{</span><span class="s1">&#39;id&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">9</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">]}</span>
 </span></code></pre></div>
 <h2 id="custom-storage-backends">Custom Storage Backends</h2>
 <p>While delta always needs its internal storage backend to work and be
@@ -1443,37 +1556,37 @@ <h2 id="custom-storage-backends">Custom Storage Backends</h2>
 be advantageous - and is common practice in the arrow world - to
 customize the storage interface used for reading the bulk data.</p>
 <p><code>deltalake</code> will work with any storage compliant with <code>pyarrow.fs.FileSystem</code>, however the root of the filesystem has to be adjusted to point at the root of the Delta table. We can achieve this by wrapping the custom filesystem into a <code>pyarrow.fs.SubTreeFileSystem</code>.</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="kn">import</span> <span class="nn">pyarrow.fs</span> <span class="k">as</span> <span class="nn">fs</span>
-</span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
-</span><span id="__span-3-3"><a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a>
-</span><span id="__span-3-4"><a id="__codelineno-3-4" name="__codelineno-3-4" href="#__codelineno-3-4"></a><span class="n">path</span> <span class="o">=</span> <span class="s2">&quot;&lt;path/to/table&gt;&quot;</span>
-</span><span id="__span-3-5"><a id="__codelineno-3-5" name="__codelineno-3-5" href="#__codelineno-3-5"></a><span class="n">filesystem</span> <span class="o">=</span> <span class="n">fs</span><span class="o">.</span><span class="n">SubTreeFileSystem</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">fs</span><span class="o">.</span><span class="n">LocalFileSystem</span><span class="p">())</span>
-</span><span id="__span-3-6"><a id="__codelineno-3-6" name="__codelineno-3-6" href="#__codelineno-3-6"></a>
-</span><span id="__span-3-7"><a id="__codelineno-3-7" name="__codelineno-3-7" href="#__codelineno-3-7"></a><span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
-</span><span id="__span-3-8"><a id="__codelineno-3-8" name="__codelineno-3-8" href="#__codelineno-3-8"></a><span class="n">ds</span> <span class="o">=</span> <span class="n">dt</span><span class="o">.</span><span class="n">to_pyarrow_dataset</span><span class="p">(</span><span class="n">filesystem</span><span class="o">=</span><span class="n">filesystem</span><span class="p">)</span>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="kn">import</span> <span class="nn">pyarrow.fs</span> <span class="k">as</span> <span class="nn">fs</span>
+</span><span id="__span-4-2"><a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a><span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
+</span><span id="__span-4-3"><a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a>
+</span><span id="__span-4-4"><a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a><span class="n">path</span> <span class="o">=</span> <span class="s2">&quot;&lt;path/to/table&gt;&quot;</span>
+</span><span id="__span-4-5"><a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a><span class="n">filesystem</span> <span class="o">=</span> <span class="n">fs</span><span class="o">.</span><span class="n">SubTreeFileSystem</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">fs</span><span class="o">.</span><span class="n">LocalFileSystem</span><span class="p">())</span>
+</span><span id="__span-4-6"><a id="__codelineno-4-6" name="__codelineno-4-6" href="#__codelineno-4-6"></a>
+</span><span id="__span-4-7"><a id="__codelineno-4-7" name="__codelineno-4-7" href="#__codelineno-4-7"></a><span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+</span><span id="__span-4-8"><a id="__codelineno-4-8" name="__codelineno-4-8" href="#__codelineno-4-8"></a><span class="n">ds</span> <span class="o">=</span> <span class="n">dt</span><span class="o">.</span><span class="n">to_pyarrow_dataset</span><span class="p">(</span><span class="n">filesystem</span><span class="o">=</span><span class="n">filesystem</span><span class="p">)</span>
 </span></code></pre></div>
 <p>When using the pyarrow factory method for file systems, the normalized
 path is provided on creation. In case of S3 this would look something
 like:</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="kn">import</span> <span class="nn">pyarrow.fs</span> <span class="k">as</span> <span class="nn">fs</span>
-</span><span id="__span-4-2"><a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a><span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
-</span><span id="__span-4-3"><a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a>
-</span><span id="__span-4-4"><a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a><span class="n">table_uri</span> <span class="o">=</span> <span class="s2">&quot;s3://&lt;bucket&gt;/&lt;path&gt;&quot;</span>
-</span><span id="__span-4-5"><a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a><span class="n">raw_fs</span><span class="p">,</span> <span class="n">normalized_path</span> <span class="o">=</span> <span class="n">fs</span><span class="o">.</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">from_uri</span><span class="p">(</span><span class="n">table_uri</span><span class="p">)</span>
-</span><span id="__span-4-6"><a id="__codelineno-4-6" name="__codelineno-4-6" href="#__codelineno-4-6"></a><span class="n">filesystem</span> <span class="o">=</span> <span class="n">fs</span><span class="o">.</span><span class="n">SubTreeFileSystem</span><span class="p">(</span><span class="n">normalized_path</span><span class="p">,</span> <span class="n">raw_fs</span><span class="p">)</span>
-</span><span id="__span-4-7"><a id="__codelineno-4-7" name="__codelineno-4-7" href="#__codelineno-4-7"></a>
-</span><span id="__span-4-8"><a id="__codelineno-4-8" name="__codelineno-4-8" href="#__codelineno-4-8"></a><span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="n">table_uri</span><span class="p">)</span>
-</span><span id="__span-4-9"><a id="__codelineno-4-9" name="__codelineno-4-9" href="#__codelineno-4-9"></a><span class="n">ds</span> <span class="o">=</span> <span class="n">dt</span><span class="o">.</span><span class="n">to_pyarrow_dataset</span><span class="p">(</span><span class="n">filesystem</span><span class="o">=</span><span class="n">filesystem</span><span class="p">)</span>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="kn">import</span> <span class="nn">pyarrow.fs</span> <span class="k">as</span> <span class="nn">fs</span>
+</span><span id="__span-5-2"><a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a><span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
+</span><span id="__span-5-3"><a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a>
+</span><span id="__span-5-4"><a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="n">table_uri</span> <span class="o">=</span> <span class="s2">&quot;s3://&lt;bucket&gt;/&lt;path&gt;&quot;</span>
+</span><span id="__span-5-5"><a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="n">raw_fs</span><span class="p">,</span> <span class="n">normalized_path</span> <span class="o">=</span> <span class="n">fs</span><span class="o">.</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">from_uri</span><span class="p">(</span><span class="n">table_uri</span><span class="p">)</span>
+</span><span id="__span-5-6"><a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a><span class="n">filesystem</span> <span class="o">=</span> <span class="n">fs</span><span class="o">.</span><span class="n">SubTreeFileSystem</span><span class="p">(</span><span class="n">normalized_path</span><span class="p">,</span> <span class="n">raw_fs</span><span class="p">)</span>
+</span><span id="__span-5-7"><a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a>
+</span><span id="__span-5-8"><a id="__codelineno-5-8" name="__codelineno-5-8" href="#__codelineno-5-8"></a><span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="n">table_uri</span><span class="p">)</span>
+</span><span id="__span-5-9"><a id="__codelineno-5-9" name="__codelineno-5-9" href="#__codelineno-5-9"></a><span class="n">ds</span> <span class="o">=</span> <span class="n">dt</span><span class="o">.</span><span class="n">to_pyarrow_dataset</span><span class="p">(</span><span class="n">filesystem</span><span class="o">=</span><span class="n">filesystem</span><span class="p">)</span>
 </span></code></pre></div>
 <h2 id="time-travel">Time Travel</h2>
 <p>To load previous table states, you can provide the version number you
 wish to load:</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/simple_table&quot;</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/simple_table&quot;</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
 </span></code></pre></div>
 <p>Once you\'ve loaded a table, you can also change versions using either a
 version number or datetime string:</p>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">load_version</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
-</span><span id="__span-6-2"><a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">load_with_datetime</span><span class="p">(</span><span class="s2">&quot;2021-11-04 00:05:23.283+00:00&quot;</span><span class="p">)</span>
+<div class="language-python highlight"><pre><span></span><code><span id="__span-7-1"><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">load_version</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
+</span><span id="__span-7-2"><a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="o">&gt;&gt;&gt;</span> <span class="n">dt</span><span class="o">.</span><span class="n">load_with_datetime</span><span class="p">(</span><span class="s2">&quot;2021-11-04 00:05:23.283+00:00&quot;</span><span class="p">)</span>
 </span></code></pre></div>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
diff --git a/usage/managing-tables/index.html b/usage/managing-tables/index.html
index 2c9ffabe1f..811052d509 100644
--- a/usage/managing-tables/index.html
+++ b/usage/managing-tables/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1153,6 +1223,8 @@
       
         
       
+        
+      
     
     
     
@@ -1213,6 +1285,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/usage/optimize/delta-lake-z-order/index.html b/usage/optimize/delta-lake-z-order/index.html
index a12306247d..f06f9826e0 100644
--- a/usage/optimize/delta-lake-z-order/index.html
+++ b/usage/optimize/delta-lake-z-order/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1109,6 +1179,8 @@
       
         
       
+        
+      
     
     
     
@@ -1169,6 +1241,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/usage/optimize/small-file-compaction-with-optimize/index.html b/usage/optimize/small-file-compaction-with-optimize/index.html
index cca1736cc2..90153e3cbf 100644
--- a/usage/optimize/small-file-compaction-with-optimize/index.html
+++ b/usage/optimize/small-file-compaction-with-optimize/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1181,6 +1251,8 @@
       
         
       
+        
+      
     
     
     
@@ -1241,6 +1313,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/usage/overview/index.html b/usage/overview/index.html
index 0f0f60b9d6..6566fe7e51 100644
--- a/usage/overview/index.html
+++ b/usage/overview/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
+        
         
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1109,6 +1179,8 @@
       
         
       
+        
+      
     
     
     
@@ -1169,6 +1241,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
@@ -1320,30 +1412,9 @@
 
 
 <h1 id="usage">Usage</h1>
-<p>A <a class="autorefs autorefs-internal" href="../../api/delta_table/#deltalake.DeltaTable">DeltaTable</a> represents the state of a
-delta table at a particular version. This includes which files are
-currently part of the table, the schema of the table, and other metadata
-such as creation time.</p>
-<div class="tabbed-set tabbed-alternate" data-tabs="1:2"><input checked="checked" id="__tabbed_1_1" name="__tabbed_1" type="radio" /><input id="__tabbed_1_2" name="__tabbed_1" type="radio" /><div class="tabbed-labels"><label for="__tabbed_1_1"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M439.8 200.5c-7.7-30.9-22.3-54.2-53.4-54.2h-40.1v47.4c0 36.8-31.2 67.8-66.8 67.8H172.7c-29.2 0-53.4 25-53.4 54.3v101.8c0 29 25.2 46 53.4 54.3 33.8 9.9 66.3 11.7 106.8 0 26.9-7.8 53.4-23.5 53.4-54.3v-40.7H226.2v-13.6h160.2c31.1 0 42.6-21.7 53.4-54.2 11.2-33.5 10.7-65.7 0-108.6zM286.2 404c11.1 0 20.1 9.1 20.1 20.3 0 11.3-9 20.4-20.1 20.4-11 0-20.1-9.2-20.1-20.4.1-11.3 9.1-20.3 20.1-20.3zM167.8 248.1h106.8c29.7 0 53.4-24.5 53.4-54.3V91.9c0-29-24.4-50.7-53.4-55.6-35.8-5.9-74.7-5.6-106.8.1-45.2 8-53.4 24.7-53.4 55.6v40.7h106.9v13.6h-147c-31.1 0-58.3 18.7-66.8 54.2-9.8 40.7-10.2 66.1 0 108.6 7.6 31.6 25.7 54.2 56.8 54.2H101v-48.8c0-35.3 30.5-66.4 66.8-66.4zm-6.7-142.6c-11.1 0-20.1-9.1-20.1-20.3.1-11.3 9-20.4 20.1-20.4 11 0 20.1 9.2 20.1 20.4s-9 20.3-20.1 20.3z"/></svg></span> Python</label><label for="__tabbed_1_2"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="m508.52 249.75-21.82-13.51c-.17-2-.34-3.93-.55-5.88l18.72-17.5a7.35 7.35 0 0 0-2.44-12.25l-24-9c-.54-1.88-1.08-3.78-1.67-5.64l15-20.83a7.35 7.35 0 0 0-4.79-11.54l-25.42-4.15c-.9-1.73-1.79-3.45-2.73-5.15l10.68-23.42a7.35 7.35 0 0 0-6.95-10.39l-25.82.91q-1.79-2.22-3.61-4.4L439 81.84a7.36 7.36 0 0 0-8.84-8.84L405 78.93q-2.17-1.83-4.4-3.61l.91-25.82a7.35 7.35 0 0 0-10.39-7L367.7 53.23c-1.7-.94-3.43-1.84-5.15-2.73l-4.15-25.42a7.35 7.35 0 0 0-11.54-4.79L326 35.26c-1.86-.59-3.75-1.13-5.64-1.67l-9-24a7.35 7.35 0 0 0-12.25-2.44l-17.5 18.72c-1.95-.21-3.91-.38-5.88-.55L262.25 3.48a7.35 7.35 0 0 0-12.5 0L236.24 25.3c-2 .17-3.93.34-5.88.55l-17.5-18.72a7.35 7.35 0 0 0-12.25 2.44l-9 24c-1.89.55-3.79 1.08-5.66 1.68l-20.82-15a7.35 7.35 0 0 0-11.54 4.79l-4.15 25.41c-1.73.9-3.45 1.79-5.16 2.73l-23.4-10.63a7.35 7.35 0 0 0-10.39 7l.92 25.81c-1.49 1.19-3 2.39-4.42 3.61L81.84 73A7.36 7.36 0 0 0 73 81.84L78.93 107c-1.23 1.45-2.43 2.93-3.62 4.41l-25.81-.91a7.42 7.42 0 0 0-6.37 3.26 7.35 7.35 0 0 0-.57 7.13l10.66 23.41c-.94 1.7-1.83 3.43-2.73 5.16l-25.41 4.14a7.35 7.35 0 0 0-4.79 11.54l15 20.82c-.59 1.87-1.13 3.77-1.68 5.66l-24 9a7.35 7.35 0 0 0-2.44 12.25l18.72 17.5c-.21 1.95-.38 3.91-.55 5.88l-21.86 13.5a7.35 7.35 0 0 0 0 12.5l21.82 13.51c.17 2 .34 3.92.55 5.87l-18.72 17.5a7.35 7.35 0 0 0 2.44 12.25l24 9c.55 1.89 1.08 3.78 1.68 5.65l-15 20.83a7.35 7.35 0 0 0 4.79 11.54l25.42 4.15c.9 1.72 1.79 3.45 2.73 5.14l-10.63 23.43a7.35 7.35 0 0 0 .57 7.13 7.13 7.13 0 0 0 6.37 3.26l25.83-.91q1.77 2.22 3.6 4.4L73 430.16a7.36 7.36 0 0 0 8.84 8.84l25.16-5.93q2.18 1.83 4.41 3.61l-.92 25.82a7.35 7.35 0 0 0 10.39 6.95l23.43-10.68c1.69.94 3.42 1.83 5.14 2.73l4.15 25.42a7.34 7.34 0 0 0 11.54 4.78l20.83-15c1.86.6 3.76 1.13 5.65 1.68l9 24a7.36 7.36 0 0 0 12.25 2.44l17.5-18.72c1.95.21 3.92.38 5.88.55l13.51 21.82a7.35 7.35 0 0 0 12.5 0l13.51-21.82c2-.17 3.93-.34 5.88-.56l17.5 18.73a7.36 7.36 0 0 0 12.25-2.44l9-24c1.89-.55 3.78-1.08 5.65-1.68l20.82 15a7.34 7.34 0 0 0 11.54-4.78l4.15-25.42c1.72-.9 3.45-1.79 5.15-2.73l23.42 10.68a7.35 7.35 0 0 0 10.39-6.95l-.91-25.82q2.22-1.79 4.4-3.61l25.15 5.93a7.36 7.36 0 0 0 8.84-8.84L433.07 405q1.83-2.17 3.61-4.4l25.82.91a7.23 7.23 0 0 0 6.37-3.26 7.35 7.35 0 0 0 .58-7.13l-10.68-23.42c.94-1.7 1.83-3.43 2.73-5.15l25.42-4.15a7.35 7.35 0 0 0 4.79-11.54l-15-20.83c.59-1.87 1.13-3.76 1.67-5.65l24-9a7.35 7.35 0 0 0 2.44-12.25l-18.72-17.5c.21-1.95.38-3.91.55-5.87l21.82-13.51a7.35 7.35 0 0 0 0-12.5Zm-151 129.08A13.91 13.91 0 0 0 341 389.51l-7.64 35.67a187.51 187.51 0 0 1-156.36-.74l-7.64-35.66a13.87 13.87 0 0 0-16.46-10.68l-31.51 6.76a187.38 187.38 0 0 1-16.26-19.21H258.3c1.72 0 2.89-.29 2.89-1.91v-54.19c0-1.57-1.17-1.91-2.89-1.91h-44.83l.05-34.35H262c4.41 0 23.66 1.28 29.79 25.87 1.91 7.55 6.17 32.14 9.06 40 2.89 8.82 14.6 26.46 27.1 26.46H407a187.3 187.3 0 0 1-17.34 20.09Zm25.77 34.49A15.24 15.24 0 1 1 368 398.08h.44a15.23 15.23 0 0 1 14.8 15.24Zm-225.62-.68a15.24 15.24 0 1 1-15.25-15.25h.45a15.25 15.25 0 0 1 14.75 15.25Zm-88.1-178.49 32.83-14.6a13.88 13.88 0 0 0 7.06-18.33L102.69 186h26.56v119.73h-53.6a187.65 187.65 0 0 1-6.08-71.58Zm-11.26-36.06a15.24 15.24 0 0 1 15.23-15.25H74a15.24 15.24 0 1 1-15.67 15.24Zm155.16 24.49.05-35.32h63.26c3.28 0 23.07 3.77 23.07 18.62 0 12.29-15.19 16.7-27.68 16.7ZM399 306.71c-9.8 1.13-20.63-4.12-22-10.09-5.78-32.49-15.39-39.4-30.57-51.4 18.86-11.95 38.46-29.64 38.46-53.26 0-25.52-17.49-41.59-29.4-49.48-16.76-11-35.28-13.23-40.27-13.23h-198.9a187.49 187.49 0 0 1 104.89-59.19l23.47 24.6a13.82 13.82 0 0 0 19.6.44l26.26-25a187.51 187.51 0 0 1 128.37 91.43l-18 40.57a14 14 0 0 0 7.09 18.33l34.59 15.33a187.12 187.12 0 0 1 .4 32.54h-19.28c-1.91 0-2.69 1.27-2.69 3.13v8.82C421 301 409.31 305.58 399 306.71ZM240 60.21A15.24 15.24 0 0 1 255.21 45h.45A15.24 15.24 0 1 1 240 60.21ZM436.84 214a15.24 15.24 0 1 1 0-30.48h.44a15.24 15.24 0 0 1-.44 30.48Z"/></svg></span> Rust</label></div>
-<div class="tabbed-content">
-<div class="tabbed-block">
-<p><a href="https://delta-io.github.io/delta-rs/api//delta_table"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M7 7H5a2 2 0 0 0-2 2v8h2v-4h2v4h2V9a2 2 0 0 0-2-2m0 4H5V9h2m7-2h-4v10h2v-4h2a2 2 0 0 0 2-2V9a2 2 0 0 0-2-2m0 4h-2V9h2m6 0v6h1v2h-4v-2h1V9h-1V7h4v2Z"/></svg></span>  <code>DeltaTable</code></a>
-<div class="language-python highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="kn">from</span> <span class="nn">deltalake</span> <span class="kn">import</span> <span class="n">DeltaTable</span>
-</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>
-</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="n">dt</span> <span class="o">=</span> <span class="n">DeltaTable</span><span class="p">(</span><span class="s2">&quot;../rust/tests/data/delta-0.2.0&quot;</span><span class="p">)</span>
-</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Version: </span><span class="si">{</span><span class="n">dt</span><span class="o">.</span><span class="n">version</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
-</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Files: </span><span class="si">{</span><span class="n">dt</span><span class="o">.</span><span class="n">files</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
-</span></code></pre></div></p>
-</div>
-<div class="tabbed-block">
-<p><a href="https://docs.rs/deltalake/latest/deltalake/table/struct.DeltaTable.html"><span class="twemoji"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M7 7H5a2 2 0 0 0-2 2v8h2v-4h2v4h2V9a2 2 0 0 0-2-2m0 4H5V9h2m7-2h-4v10h2v-4h2a2 2 0 0 0 2-2V9a2 2 0 0 0-2-2m0 4h-2V9h2m6 0v6h1v2h-4v-2h1V9h-1V7h4v2Z"/></svg></span>  <code>DeltaTable</code></a>
-<div class="language-rust highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="kd">let</span><span class="w"> </span><span class="n">table</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">deltalake</span>::<span class="n">open_table</span><span class="p">(</span><span class="s">&quot;../rust/tests/data/simple_table&quot;</span><span class="p">).</span><span class="k">await</span><span class="p">.</span><span class="n">unwrap</span><span class="p">();</span>
-</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="fm">println!</span><span class="p">(</span><span class="s">&quot;Version: {}&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">table</span><span class="p">.</span><span class="n">version</span><span class="p">());</span>
-</span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="fm">println!</span><span class="p">(</span><span class="s">&quot;Files: {}&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">table</span><span class="p">.</span><span class="n">get_files</span><span class="p">());</span>
-</span></code></pre></div></p>
-</div>
-</div>
-</div>
+<p>This guide teaches you how to use Delta Lake.  You will learn how to create Delta tables, run queries, perform DML operations, and optimize your tables.</p>
+<p>It's easy to use Delta Lake with pandas, Polars, Rust, or any other PyArrow-like DataFrame library.</p>
+<p>See the <a href="https://docs.delta.io/latest/index.html">Spark Delta Lake documentation</a> if you're using Delta Lake with Spark.</p>
 
 
 
diff --git a/usage/querying-delta-tables/index.html b/usage/querying-delta-tables/index.html
index 9ced6fe163..fdcdd484d6 100644
--- a/usage/querying-delta-tables/index.html
+++ b/usage/querying-delta-tables/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1109,6 +1179,8 @@
       
         
       
+        
+      
     
     
     
@@ -1169,6 +1241,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/usage/writing-delta-tables/index.html b/usage/writing-delta-tables/index.html
index bd7f8ae6df..e9eb2e91ac 100644
--- a/usage/writing-delta-tables/index.html
+++ b/usage/writing-delta-tables/index.html
@@ -223,16 +223,19 @@
   
   
   
-    <li class="md-tabs__item">
-      <a href="../.." class="md-tabs__link">
-        
+    
+    
+      <li class="md-tabs__item">
+        <a href="../.." class="md-tabs__link">
+          
   
     
   
   Home
 
-      </a>
-    </li>
+        </a>
+      </li>
+    
   
 
       
@@ -373,19 +376,86 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
         
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_1" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../.." class="md-nav__link ">
+              
   
   <span class="md-ellipsis">
     Home
   </span>
   
 
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../why-use-delta-lake/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
       </a>
     </li>
   
 
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
     
       
       
@@ -1146,6 +1216,8 @@
       
         
       
+        
+      
     
     
     
@@ -1206,6 +1278,26 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../integrations/delta-lake-pandas/" class="md-nav__link">
         
diff --git a/why-use-delta-lake/index.html b/why-use-delta-lake/index.html
new file mode 100644
index 0000000000..6e8c916a15
--- /dev/null
+++ b/why-use-delta-lake/index.html
@@ -0,0 +1,1704 @@
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+      
+      
+        <link rel="canonical" href="https://github.com/delta-io/delta-rs/why-use-delta-lake/">
+      
+      
+        <link rel="prev" href="..">
+      
+      
+        <link rel="next" href="../usage/installation/">
+      
+      
+      <link rel="icon" href="../delta-rust-no-whitespace.svg">
+      <meta name="generator" content="mkdocs-1.5.3, mkdocs-material-9.4.5">
+    
+    
+      
+        <title>Why Use Delta Lake - Delta Lake Documentation</title>
+      
+    
+    
+      <link rel="stylesheet" href="../assets/stylesheets/main.6a10b989.min.css">
+      
+        
+        <link rel="stylesheet" href="../assets/stylesheets/palette.356b1318.min.css">
+      
+      
+
+
+    
+    
+      
+    
+    
+      
+        
+        
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
+        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+      
+    
+    
+      <link rel="stylesheet" href="../assets/_markdown_exec_ansi.css">
+    
+      <link rel="stylesheet" href="../assets/_mkdocstrings.css">
+    
+    <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    
+      
+
+    
+    
+    
+  </head>
+  
+  
+    
+    
+      
+    
+    
+    
+    
+    <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
+  
+    
+    
+      <script>var palette=__md_get("__palette");if(palette&&"object"==typeof palette.color)for(var key of Object.keys(palette.color))document.body.setAttribute("data-md-color-"+key,palette.color[key])</script>
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#why-use-delta-lake" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+    
+      
+
+  
+
+<header class="md-header md-header--shadow md-header--lifted" data-md-component="header">
+  <nav class="md-header__inner md-grid" aria-label="Header">
+    <a href=".." title="Delta Lake Documentation" class="md-header__button md-logo" aria-label="Delta Lake Documentation" data-md-component="logo">
+      
+  <img src="../delta-rust-no-whitespace.svg" alt="logo">
+
+    </a>
+    <label class="md-header__button md-icon" for="__drawer">
+      
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
+    </label>
+    <div class="md-header__title" data-md-component="header-title">
+      <div class="md-header__ellipsis">
+        <div class="md-header__topic">
+          <span class="md-ellipsis">
+            Delta Lake Documentation
+          </span>
+        </div>
+        <div class="md-header__topic" data-md-component="header-topic">
+          <span class="md-ellipsis">
+            
+              Why Use Delta Lake
+            
+          </span>
+        </div>
+      </div>
+    </div>
+    
+      
+        <form class="md-header__option" data-md-component="palette">
+  
+    
+    
+    
+    <input class="md-option" data-md-color-media="(prefers-color-scheme)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo"  aria-label="Switch to light mode"  type="radio" name="__palette" id="__palette_1">
+    
+      <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_3" hidden>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m14.3 16-.7-2h-3.2l-.7 2H7.8L11 7h2l3.2 9h-1.9M20 8.69V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69m-9.15 3.96h2.3L12 9l-1.15 3.65Z"/></svg>
+      </label>
+    
+  
+    
+    
+    
+    <input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo"  aria-label="Switch to dark mode"  type="radio" name="__palette" id="__palette_2">
+    
+      <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69Z"/></svg>
+      </label>
+    
+  
+    
+    
+    
+    <input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="indigo" data-md-color-accent="indigo"  aria-label="Switch to system preference"  type="radio" name="__palette" id="__palette_3">
+    
+      <label class="md-header__button md-icon" title="Switch to system preference" for="__palette_2" hidden>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12c0-2.42-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69Z"/></svg>
+      </label>
+    
+  
+</form>
+      
+    
+    
+    
+      <label class="md-header__button md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
+      </label>
+      <div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
+      <label class="md-search__icon md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
+      </label>
+      <nav class="md-search__options" aria-label="Search">
+        
+        <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
+          
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
+        </button>
+      </nav>
+      
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list" role="presentation"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header__source">
+        <a href="https://github.com/delta-io/delta-rs" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    delta-io/delta-rs
+  </div>
+</a>
+      </div>
+    
+  </nav>
+  
+    
+      
+<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
+  <div class="md-grid">
+    <ul class="md-tabs__list">
+      
+        
+  
+  
+    
+  
+  
+    
+    
+      <li class="md-tabs__item md-tabs__item--active">
+        <a href=".." class="md-tabs__link">
+          
+  
+    
+  
+  Home
+
+        </a>
+      </li>
+    
+  
+
+      
+        
+  
+  
+  
+    
+    
+      <li class="md-tabs__item">
+        <a href="../usage/installation/" class="md-tabs__link">
+          
+  
+    
+  
+  Usage
+
+        </a>
+      </li>
+    
+  
+
+      
+        
+  
+  
+  
+    
+    
+      <li class="md-tabs__item">
+        <a href="../api/delta_writer/" class="md-tabs__link">
+          
+  
+    
+  
+  API Reference
+
+        </a>
+      </li>
+    
+  
+
+      
+        
+  
+  
+  
+    
+    
+      <li class="md-tabs__item">
+        <a href="../integrations/delta-lake-arrow/" class="md-tabs__link">
+          
+  
+    
+  
+  Integrations
+
+        </a>
+      </li>
+    
+  
+
+      
+        
+  
+  
+  
+    
+    
+      <li class="md-tabs__item">
+        <a href="../how-delta-lake-works/architecture-of-delta-table/" class="md-tabs__link">
+          
+  
+    
+  
+  How Delta Lake works
+
+        </a>
+      </li>
+    
+  
+
+      
+    </ul>
+  </div>
+</nav>
+    
+  
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+      
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              
+              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+
+  
+
+
+<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href=".." title="Delta Lake Documentation" class="md-nav__button md-logo" aria-label="Delta Lake Documentation" data-md-component="logo">
+      
+  <img src="../delta-rust-no-whitespace.svg" alt="logo">
+
+    </a>
+    Delta Lake Documentation
+  </label>
+  
+    <div class="md-nav__source">
+      <a href="https://github.com/delta-io/delta-rs" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81z"/></svg>
+  </div>
+  <div class="md-source__repository">
+    delta-io/delta-rs
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+  
+  
+    
+  
+  
+    
+    
+      
+        
+          
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" checked>
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href=".." class="md-nav__link ">
+              
+  
+  <span class="md-ellipsis">
+    Home
+  </span>
+  
+
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_1">
+            <span class="md-nav__icon md-icon"></span>
+            Home
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+    
+  
+  
+    <li class="md-nav__item md-nav__item--active">
+      
+      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
+      
+      
+        
+      
+      
+        <label class="md-nav__link md-nav__link--active" for="__toc">
+          
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
+          <span class="md-nav__icon md-icon"></span>
+        </label>
+      
+      <a href="./" class="md-nav__link md-nav__link--active">
+        
+  
+  <span class="md-ellipsis">
+    Why Use Delta Lake
+  </span>
+  
+
+      </a>
+      
+        
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#fast-performance" class="md-nav__link">
+    Fast performance
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#developer-friendly-features" class="md-nav__link">
+    Developer friendly features
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#reliable-transactions" class="md-nav__link">
+    Reliable transactions
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#interoperability" class="md-nav__link">
+    Interoperability
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#support-for-many-languages" class="md-nav__link">
+    Support for many languages
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#support-on-multiple-clouds" class="md-nav__link">
+    Support on multiple clouds
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#conclusion" class="md-nav__link">
+    Conclusion
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+      
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+    
+    
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
+        
+          
+          <label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
+            
+  
+  <span class="md-ellipsis">
+    Usage
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_2">
+            <span class="md-nav__icon md-icon"></span>
+            Usage
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/installation/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Installation
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/overview/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Overview
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/create-delta-lake-table/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Creating a table
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/loading-table/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Loading a table
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/appending-overwriting-delta-lake-table/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Append/overwrite tables
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/constraints/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Adding a constraint
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/examining-table/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Examining a table
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/querying-delta-tables/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Querying a table
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/managing-tables/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Managing a table
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/writing-delta-tables/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Writing a table
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/deleting-rows-from-delta-lake-table/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Deleting rows from a table
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    
+    
+      
+        
+      
+        
+      
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2_12" >
+        
+          
+          <label class="md-nav__link" for="__nav_2_12" id="__nav_2_12_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Optimize
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_2_12_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_2_12">
+            <span class="md-nav__icon md-icon"></span>
+            Optimize
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/optimize/small-file-compaction-with-optimize/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Small file compaction
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../usage/optimize/delta-lake-z-order/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Z Order
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+    
+    
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
+        
+          
+          <label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
+            
+  
+  <span class="md-ellipsis">
+    API Reference
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_3">
+            <span class="md-nav__icon md-icon"></span>
+            API Reference
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../api/delta_writer/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Writer
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    
+    
+      
+        
+          
+        
+      
+        
+      
+        
+      
+        
+      
+        
+      
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3_2" >
+        
+          
+          
+          <div class="md-nav__link md-nav__container">
+            <a href="../api/delta_table/" class="md-nav__link ">
+              
+  
+  <span class="md-ellipsis">
+    Table
+  </span>
+  
+
+            </a>
+            
+              
+              <label class="md-nav__link " for="__nav_3_2" id="__nav_3_2_label" tabindex="0">
+                <span class="md-nav__icon md-icon"></span>
+              </label>
+            
+          </div>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_2_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_3_2">
+            <span class="md-nav__icon md-icon"></span>
+            Table
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../api/delta_table/metadata/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Metadata
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../api/delta_table/delta_table_merger/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    TableMerger
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../api/delta_table/delta_table_optimizer/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    TableOptimizer
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../api/delta_table/delta_table_alterer/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    TableAlterer
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../api/schema/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Schema
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../api/storage/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Storage
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../api/catalog/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Catalog
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../api/exceptions/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Exceptions
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+    
+    
+      
+        
+      
+        
+      
+        
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
+        
+          
+          <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
+            
+  
+  <span class="md-ellipsis">
+    Integrations
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_4">
+            <span class="md-nav__icon md-icon"></span>
+            Integrations
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../integrations/delta-lake-arrow/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Arrow
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../integrations/delta-lake-datafusion/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    DataFusion
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../integrations/delta-lake-pandas/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    pandas
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../integrations/delta-lake-polars/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Polars
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+    
+    
+      
+        
+      
+    
+    
+    
+    
+      
+      
+    
+    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
+      
+        
+        
+        
+          
+        
+        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="">
+            
+  
+  <span class="md-ellipsis">
+    How Delta Lake works
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_5">
+            <span class="md-nav__icon md-icon"></span>
+            How Delta Lake works
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../how-delta-lake-works/architecture-of-delta-table/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Architecture
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#fast-performance" class="md-nav__link">
+    Fast performance
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#developer-friendly-features" class="md-nav__link">
+    Developer friendly features
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#reliable-transactions" class="md-nav__link">
+    Reliable transactions
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#interoperability" class="md-nav__link">
+    Interoperability
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#support-for-many-languages" class="md-nav__link">
+    Support for many languages
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#support-on-multiple-clouds" class="md-nav__link">
+    Support on multiple clouds
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#conclusion" class="md-nav__link">
+    Conclusion
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          
+            <div class="md-content" data-md-component="content">
+              <article class="md-content__inner md-typeset">
+                
+                  
+
+  
+  
+
+
+<h1 id="why-use-delta-lake">Why use Delta Lake</h1>
+<p>This page explains why Delta Lake is a better storage format for most tabular data analyses than data lake alternatives.</p>
+<p>Delta Lake provides developer-friendly features, reliable transactions, and fast performance compared with alternatives like Parquet or CSV.</p>
+<h2 id="fast-performance">Fast performance</h2>
+<p>Delta tables store data in Parquet files and persist file-level metadata in the transaction log.</p>
+<p>This offers two main performance advantages:</p>
+<ul>
+<li>File skipping based on metadata that’s quickly accessible</li>
+<li>Easy identification of all file paths for the table, compared to file listing operations that can be slow, especially on cloud object stores</li>
+</ul>
+<p>Delta Lake stores min/max values for each column of each file in the table.  Certain queries can skip entire files based on the metadata.  File skipping can be a massive performance optimization.</p>
+<p>Delta Lake also makes it easy to rearrange data in the table, so more file skipping is possible.  For example, the table can be partitioned or Z Ordered, so that similar data is colocated in the same files and data skipping is optimal for your query patterns.</p>
+<p>For data lakes, you need to run file listing operations to get the file paths before you can actually read the data.  Listing all the files in a data lake can take a long time, especially if there are a lot of files and they are stored in Hive-style partitions.</p>
+<p>Delta Lake stores all the file paths in the transaction log.  So you can quickly get the file paths directly from the log and then run your query.  Delta Lake also stores the file-level metadata in the transaction log which is quicker than opening all the files in the data lake and grabbing the metadata from the file footer.</p>
+<h2 id="developer-friendly-features">Developer friendly features</h2>
+<p>Many basic data operations are hard in data lakes but quite easy with Delta Lake.  The only data operation that’s easy with in data lake is appending data.  Delta Lake makes all data operations easy including the following:</p>
+<ul>
+<li>Appends</li>
+<li>Upserts</li>
+<li>Deletes</li>
+<li>Replace where</li>
+</ul>
+<p>Even deleting a few rows of data from a data lake is hard.  It’s even harder if you want to run the operation in a performant manner.</p>
+<p>Delta Lake makes it easy to run common data operations and executes them performantly under the hood.</p>
+<p>Delta Lake also executes write operations as transactions, which makes data operations safer and prevents downtime.  Write operations will cause data lakes to be in an unstable state while the computations is running.  For example, if you read a data lake while a delete operation is running, then you may get the wrong data.</p>
+<p>Let’s explore the benefits of reliable transactions in more detail.</p>
+<h2 id="reliable-transactions">Reliable transactions</h2>
+<p>Delta Lake supports transactions which means that write operations have the following characteristics:</p>
+<ul>
+<li>They either finish completely or don’t run at all</li>
+<li>They are executed in a serial manner and don’t conflict with other transactions</li>
+<li>They don’t corrupt a table or violate table constraints</li>
+</ul>
+<p>Data lakes don’t support transactions, so the write operations can cause the following errors:</p>
+<ul>
+<li>There is no schema enforcement, so you can append data to a data lake with a mismatching schema</li>
+<li>Reading the data lake often yields incorrect results while write transactions are performed</li>
+<li>Data lakes can be corrupted for invalid write operations or computations that error-out</li>
+<li>Concurrent transactions that conflict can cause data loss</li>
+</ul>
+<p>Production data systems should rely on storage systems like Delta Lake that support transactions.</p>
+<h2 id="interoperability">Interoperability</h2>
+<p>Delta Lake tables are interoperable and can be read/written by multiple different query engines.</p>
+<p>For example, you can create a Delta table with Spark, append to it with pandas, and then read it with Polars.</p>
+<p><img alt="" src="../delta-interop.png" /></p>
+<p>Delta tables are powerful because they are interoperable with various query engines and computation runtimes.</p>
+<p>Suppose you have a Delta table that’s updated with an AWS Lambda function every 5 minutes.  There is only a small amount of data collected every 5 minutes, so a lightweight runtime like AWS Lambda is sufficient.</p>
+<p>Further suppose that the overall table is quite large.  So when you want to perform DML operations or query the whole table, your team uses a Spark cluster.</p>
+<p>Delta Lake is flexible to allow these types of operations from multiple readers and writers.  This provides teams with the flexibility to choose the right tool for the job.</p>
+<h2 id="support-for-many-languages">Support for many languages</h2>
+<p>Delta tables can be queried with a variety of different languages.  This project provides APIs for Rust and Python users and does not depend on Java or Scala.  This project is a great alternative for pandas, Polars, DuckDB, or DataFusion.</p>
+<p>Delta Lake supports many languages and even more language support is coming soon!</p>
+<h2 id="support-on-multiple-clouds">Support on multiple clouds</h2>
+<p>Delta Lake supports multiple clouds including GCP, AWS, and Azure.</p>
+<p>You can also use Delta Lake on your local machine or in an on-prem environment.</p>
+<p>Delta Lake is quite portable.</p>
+<h2 id="conclusion">Conclusion</h2>
+<p>Delta Lake is a mature table format that offers users tons of advantages over a data lake with virtually no downsides.</p>
+<p>Once you start using Delta Lake, you will never want to go back to data lakes that expose you to a variety of dangerous bugs, poor performance, and reliability issues.</p>
+<p>The Delta Lake community is also welcome and open.  We gladly accept new contributors and help users with their questions.</p>
+
+
+
+
+
+                
+              </article>
+            </div>
+          
+          
+  <script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var tab,labels=set.querySelector(".tabbed-labels");for(tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
+
+        </div>
+        
+      </main>
+      
+        <footer class="md-footer">
+  
+    
+      
+      <nav class="md-footer__inner md-grid" aria-label="Footer" >
+        
+          
+          <a href=".." class="md-footer__link md-footer__link--prev" aria-label="Previous: Home">
+            <div class="md-footer__button md-icon">
+              
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
+            </div>
+            <div class="md-footer__title">
+              <span class="md-footer__direction">
+                Previous
+              </span>
+              <div class="md-ellipsis">
+                Home
+              </div>
+            </div>
+          </a>
+        
+        
+          
+          <a href="../usage/installation/" class="md-footer__link md-footer__link--next" aria-label="Next: Installation">
+            <div class="md-footer__title">
+              <span class="md-footer__direction">
+                Next
+              </span>
+              <div class="md-ellipsis">
+                Installation
+              </div>
+            </div>
+            <div class="md-footer__button md-icon">
+              
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4Z"/></svg>
+            </div>
+          </a>
+        
+      </nav>
+    
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-copyright">
+  
+  
+</div>
+      
+        <div class="md-social">
+  
+    
+    
+    
+    
+    <a href="https://go.delta.io/slack" target="_blank" rel="noopener" title="Delta slack channel" class="md-social__link">
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>
+    </a>
+  
+</div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    <div class="md-dialog" data-md-component="dialog">
+      <div class="md-dialog__inner md-typeset"></div>
+    </div>
+    
+    
+    <script id="__config" type="application/json">{"base": "..", "features": ["navigation.tracking", "navigation.instant", "navigation.expand", "navigation.tabs", "navigation.indexes", "navigation.tabs.sticky", "navigation.footer", "content.tabs.link", "content.code.annotation", "content.code.copy"], "search": "../assets/javascripts/workers/search.f886a092.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
+    
+    
+      <script src="../assets/javascripts/bundle.aecac24b.min.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file