From 9873457df6cded72603eb434746fc3ccfca5bd90 Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Wed, 21 Aug 2024 14:44:29 +0800
Subject: [PATCH 01/29] [core] fix hll class not found

---
 .../java/org/apache/paimon/utils/HllSketchUtil.java   | 10 ++++++++++
 .../compact/aggregate/FieldHllSketchAgg.java          | 11 ++---------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/HllSketchUtil.java b/paimon-common/src/main/java/org/apache/paimon/utils/HllSketchUtil.java
index 34c5464f7106..609862dafc20 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/HllSketchUtil.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/HllSketchUtil.java
@@ -21,10 +21,20 @@
 import org.apache.paimon.annotation.VisibleForTesting;
 
 import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
 
 /** A compressed bitmap for 32-bit integer. */
 public class HllSketchUtil {
 
+    public static byte[] union(byte[] sketchBytes1, byte[] sketchBytes2) {
+        HllSketch heapify = HllSketch.heapify((byte[]) sketchBytes1);
+        org.apache.datasketches.hll.Union union = Union.heapify((byte[]) sketchBytes2);
+        union.update(heapify);
+        HllSketch result = union.getResult(TgtHllType.HLL_4);
+        return result.toCompactByteArray();
+    }
+
     @VisibleForTesting
     public static byte[] sketchOf(int... values) {
         HllSketch hllSketch = new HllSketch();
diff --git a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldHllSketchAgg.java b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldHllSketchAgg.java
index 93901753645a..0ccf4af6497c 100644
--- a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldHllSketchAgg.java
+++ b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldHllSketchAgg.java
@@ -19,10 +19,7 @@
 package org.apache.paimon.mergetree.compact.aggregate;
 
 import org.apache.paimon.types.VarBinaryType;
-
-import org.apache.datasketches.hll.HllSketch;
-import org.apache.datasketches.hll.TgtHllType;
-import org.apache.datasketches.hll.Union;
+import org.apache.paimon.utils.HllSketchUtil;
 
 /** HllSketch aggregate a field of a row. */
 public class FieldHllSketchAgg extends FieldAggregator {
@@ -50,10 +47,6 @@ public Object agg(Object accumulator, Object inputField) {
             return accumulator == null ? inputField : accumulator;
         }
 
-        HllSketch heapify = HllSketch.heapify((byte[]) accumulator);
-        Union union = Union.heapify((byte[]) inputField);
-        union.update(heapify);
-        HllSketch result = union.getResult(TgtHllType.HLL_4);
-        return result.toCompactByteArray();
+        return HllSketchUtil.union((byte[]) accumulator, (byte[]) inputField);
     }
 }

From dedcaa4933759e2b41036cd237c5263ab56e450d Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Sun, 22 Sep 2024 20:16:35 +0800
Subject: [PATCH 02/29] [format][orc] open orc switch
 useSelected,allowSARGToFilter to make sure pushdown works

---
 .../paimon/format/orc/OrcFileFormat.java      |  1 -
 .../paimon/format/orc/OrcReaderFactory.java   | 14 ++++++---
 .../orc/reader/AbstractOrcColumnVector.java   | 31 ++++++++++++-------
 .../orc/reader/OrcArrayColumnVector.java      |  7 +++--
 .../orc/reader/OrcBytesColumnVector.java      |  7 +++--
 .../orc/reader/OrcDecimalColumnVector.java    |  5 +--
 .../orc/reader/OrcDoubleColumnVector.java     |  6 ++--
 .../OrcLegacyTimestampColumnVector.java       |  5 +--
 .../orc/reader/OrcLongColumnVector.java       |  9 ++++--
 .../format/orc/reader/OrcMapColumnVector.java | 10 +++---
 .../format/orc/reader/OrcRowColumnVector.java |  8 +++--
 .../orc/reader/OrcTimestampColumnVector.java  |  5 +--
 12 files changed, 68 insertions(+), 40 deletions(-)

diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
index de28487b715f..fdff90563440 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
@@ -99,7 +99,6 @@ public Optional<SimpleStatsExtractor> createStatsExtractor(
     public FormatReaderFactory createReaderFactory(
             RowType projectedRowType, @Nullable List<Predicate> filters) {
         List<OrcFilters.Predicate> orcPredicates = new ArrayList<>();
-
         if (filters != null) {
             for (Predicate pred : filters) {
                 Optional<OrcFilters.Predicate> orcPred =
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
index 5093a5010773..3d583333ad0d 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
@@ -123,7 +123,10 @@ public OrcReaderBatch createReaderBatch(
         for (int i = 0; i < vectors.length; i++) {
             String name = tableFieldNames.get(i);
             DataType type = tableFieldTypes.get(i);
-            vectors[i] = createPaimonVector(orcBatch.cols[tableFieldNames.indexOf(name)], type);
+            int[] selected = orcBatch.getSelected();
+            vectors[i] =
+                    createPaimonVector(
+                            orcBatch.cols[tableFieldNames.indexOf(name)], selected, type);
         }
         return new OrcReaderBatch(filePath, orcBatch, new VectorizedColumnBatch(vectors), recycler);
     }
@@ -265,10 +268,11 @@ private static RecordReader createRecordReader(
                             .schema(schema)
                             .range(offsetAndLength.getLeft(), offsetAndLength.getRight())
                             .useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
-                            .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
-                            .tolerateMissingSchema(
-                                    OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
-
+                            .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf));
+            if (!conjunctPredicates.isEmpty()) {
+                options.useSelected(true);
+                options.allowSARGToFilter(true);
+            }
             // configure filters
             if (!conjunctPredicates.isEmpty()) {
                 SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
index 21154c4967b7..5b612005f1c9 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
@@ -40,8 +40,15 @@ public abstract class AbstractOrcColumnVector
 
     private final ColumnVector vector;
 
-    AbstractOrcColumnVector(ColumnVector vector) {
+    private final int[] selected;
+
+    AbstractOrcColumnVector(ColumnVector vector, int[] selected) {
         this.vector = vector;
+        this.selected = selected;
+    }
+
+    protected int rowMapper(int r) {
+        return selected[r];
     }
 
     @Override
@@ -50,27 +57,29 @@ public boolean isNullAt(int i) {
     }
 
     public static org.apache.paimon.data.columnar.ColumnVector createPaimonVector(
-            ColumnVector vector, DataType dataType) {
+            ColumnVector vector, int[] selected, DataType dataType) {
         if (vector instanceof LongColumnVector) {
             if (dataType.getTypeRoot() == DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE) {
-                return new OrcLegacyTimestampColumnVector((LongColumnVector) vector);
+                return new OrcLegacyTimestampColumnVector((LongColumnVector) vector, selected);
             } else {
-                return new OrcLongColumnVector((LongColumnVector) vector);
+                return new OrcLongColumnVector((LongColumnVector) vector, selected);
             }
         } else if (vector instanceof DoubleColumnVector) {
-            return new OrcDoubleColumnVector((DoubleColumnVector) vector);
+            return new OrcDoubleColumnVector((DoubleColumnVector) vector, selected);
         } else if (vector instanceof BytesColumnVector) {
-            return new OrcBytesColumnVector((BytesColumnVector) vector);
+            return new OrcBytesColumnVector((BytesColumnVector) vector, selected);
         } else if (vector instanceof DecimalColumnVector) {
-            return new OrcDecimalColumnVector((DecimalColumnVector) vector);
+            return new OrcDecimalColumnVector((DecimalColumnVector) vector, selected);
         } else if (vector instanceof TimestampColumnVector) {
-            return new OrcTimestampColumnVector(vector);
+            return new OrcTimestampColumnVector(vector, selected);
         } else if (vector instanceof ListColumnVector) {
-            return new OrcArrayColumnVector((ListColumnVector) vector, (ArrayType) dataType);
+            return new OrcArrayColumnVector(
+                    (ListColumnVector) vector, selected, (ArrayType) dataType);
         } else if (vector instanceof StructColumnVector) {
-            return new OrcRowColumnVector((StructColumnVector) vector, (RowType) dataType);
+            return new OrcRowColumnVector(
+                    (StructColumnVector) vector, selected, (RowType) dataType);
         } else if (vector instanceof MapColumnVector) {
-            return new OrcMapColumnVector((MapColumnVector) vector, (MapType) dataType);
+            return new OrcMapColumnVector((MapColumnVector) vector, selected, (MapType) dataType);
         } else {
             throw new UnsupportedOperationException(
                     "Unsupported vector: " + vector.getClass().getName());
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
index ed16a0b51084..ade154221835 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
@@ -32,14 +32,15 @@ public class OrcArrayColumnVector extends AbstractOrcColumnVector
     private final ListColumnVector hiveVector;
     private final ColumnVector paimonVector;
 
-    public OrcArrayColumnVector(ListColumnVector hiveVector, ArrayType type) {
-        super(hiveVector);
+    public OrcArrayColumnVector(ListColumnVector hiveVector, int[] selected, ArrayType type) {
+        super(hiveVector, selected);
         this.hiveVector = hiveVector;
-        this.paimonVector = createPaimonVector(hiveVector.child, type.getElementType());
+        this.paimonVector = createPaimonVector(hiveVector.child, selected, type.getElementType());
     }
 
     @Override
     public InternalArray getArray(int i) {
+        i = rowMapper(i);
         long offset = hiveVector.offsets[i];
         long length = hiveVector.lengths[i];
         return new ColumnarArray(paimonVector, (int) offset, (int) length);
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java
index d48bad886a47..a2664f3b8e45 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java
@@ -26,17 +26,18 @@ public class OrcBytesColumnVector extends AbstractOrcColumnVector
 
     private final BytesColumnVector vector;
 
-    public OrcBytesColumnVector(BytesColumnVector vector) {
-        super(vector);
+    public OrcBytesColumnVector(BytesColumnVector vector, int[] selected) {
+        super(vector, selected);
         this.vector = vector;
     }
 
     @Override
     public Bytes getBytes(int i) {
         int rowId = vector.isRepeating ? 0 : i;
+        int selectedRowId = rowMapper(rowId);
         byte[][] data = vector.vector;
         int[] start = vector.start;
         int[] length = vector.length;
-        return new Bytes(data[rowId], start[rowId], length[rowId]);
+        return new Bytes(data[selectedRowId], start[selectedRowId], length[selectedRowId]);
     }
 }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java
index 9ea4d763a5d8..30963bb29fee 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java
@@ -32,13 +32,14 @@ public class OrcDecimalColumnVector extends AbstractOrcColumnVector
 
     private final DecimalColumnVector vector;
 
-    public OrcDecimalColumnVector(DecimalColumnVector vector) {
-        super(vector);
+    public OrcDecimalColumnVector(DecimalColumnVector vector, int[] selected) {
+        super(vector, selected);
         this.vector = vector;
     }
 
     @Override
     public Decimal getDecimal(int i, int precision, int scale) {
+        i = rowMapper(i);
         BigDecimal data =
                 vector.vector[vector.isRepeating ? 0 : i].getHiveDecimal().bigDecimalValue();
         return Decimal.fromBigDecimal(data, precision, scale);
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java
index 0c0b0cc51d38..0353e4aea9f4 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java
@@ -30,18 +30,20 @@ public class OrcDoubleColumnVector extends AbstractOrcColumnVector
 
     private final DoubleColumnVector vector;
 
-    public OrcDoubleColumnVector(DoubleColumnVector vector) {
-        super(vector);
+    public OrcDoubleColumnVector(DoubleColumnVector vector, int[] selected) {
+        super(vector, selected);
         this.vector = vector;
     }
 
     @Override
     public double getDouble(int i) {
+        i = rowMapper(i);
         return vector.vector[vector.isRepeating ? 0 : i];
     }
 
     @Override
     public float getFloat(int i) {
+        i = rowMapper(i);
         return (float) vector.vector[vector.isRepeating ? 0 : i];
     }
 }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java
index 18227ecf3dd2..508895632374 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java
@@ -34,13 +34,14 @@ public class OrcLegacyTimestampColumnVector extends AbstractOrcColumnVector
 
     private final LongColumnVector hiveVector;
 
-    OrcLegacyTimestampColumnVector(LongColumnVector vector) {
-        super(vector);
+    OrcLegacyTimestampColumnVector(LongColumnVector vector, int[] selected) {
+        super(vector, selected);
         this.hiveVector = vector;
     }
 
     @Override
     public Timestamp getTimestamp(int i, int precision) {
+        i = rowMapper(i);
         int index = hiveVector.isRepeating ? 0 : i;
         java.sql.Timestamp timestamp = toTimestamp(hiveVector.vector[index]);
         return Timestamp.fromSQLTimestamp(timestamp);
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java
index e7dfe0e6134e..96f6922029a3 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java
@@ -33,33 +33,38 @@ public class OrcLongColumnVector extends AbstractOrcColumnVector
 
     private final LongColumnVector vector;
 
-    public OrcLongColumnVector(LongColumnVector vector) {
-        super(vector);
+    public OrcLongColumnVector(LongColumnVector vector, int[] selected) {
+        super(vector, selected);
         this.vector = vector;
     }
 
     @Override
     public long getLong(int i) {
+        i = rowMapper(i);
         return vector.vector[vector.isRepeating ? 0 : i];
     }
 
     @Override
     public boolean getBoolean(int i) {
+        i = rowMapper(i);
         return vector.vector[vector.isRepeating ? 0 : i] == 1;
     }
 
     @Override
     public byte getByte(int i) {
+        i = rowMapper(i);
         return (byte) vector.vector[vector.isRepeating ? 0 : i];
     }
 
     @Override
     public int getInt(int i) {
+        i = rowMapper(i);
         return (int) vector.vector[vector.isRepeating ? 0 : i];
     }
 
     @Override
     public short getShort(int i) {
+        i = rowMapper(i);
         return (short) vector.vector[vector.isRepeating ? 0 : i];
     }
 }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
index 66a1af6dccf4..e45aea60b59c 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
@@ -33,15 +33,17 @@ public class OrcMapColumnVector extends AbstractOrcColumnVector
     private final ColumnVector keyPaimonVector;
     private final ColumnVector valuePaimonVector;
 
-    public OrcMapColumnVector(MapColumnVector hiveVector, MapType type) {
-        super(hiveVector);
+    public OrcMapColumnVector(MapColumnVector hiveVector, int[] selected, MapType type) {
+        super(hiveVector, selected);
         this.hiveVector = hiveVector;
-        this.keyPaimonVector = createPaimonVector(hiveVector.keys, type.getKeyType());
-        this.valuePaimonVector = createPaimonVector(hiveVector.values, type.getValueType());
+        this.keyPaimonVector = createPaimonVector(hiveVector.keys, selected, type.getKeyType());
+        this.valuePaimonVector =
+                createPaimonVector(hiveVector.values, selected, type.getValueType());
     }
 
     @Override
     public InternalMap getMap(int i) {
+        i = rowMapper(i);
         long offset = hiveVector.offsets[i];
         long length = hiveVector.lengths[i];
         return new ColumnarMap(keyPaimonVector, valuePaimonVector, (int) offset, (int) length);
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
index caa22467f9c3..2572dcb565f2 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
@@ -31,18 +31,20 @@ public class OrcRowColumnVector extends AbstractOrcColumnVector
 
     private final VectorizedColumnBatch batch;
 
-    public OrcRowColumnVector(StructColumnVector hiveVector, RowType type) {
-        super(hiveVector);
+    public OrcRowColumnVector(StructColumnVector hiveVector, int[] selected, RowType type) {
+        super(hiveVector, selected);
         int len = hiveVector.fields.length;
         ColumnVector[] paimonVectors = new ColumnVector[len];
         for (int i = 0; i < len; i++) {
-            paimonVectors[i] = createPaimonVector(hiveVector.fields[i], type.getTypeAt(i));
+            paimonVectors[i] =
+                    createPaimonVector(hiveVector.fields[i], selected, type.getTypeAt(i));
         }
         this.batch = new VectorizedColumnBatch(paimonVectors);
     }
 
     @Override
     public ColumnarRow getRow(int i) {
+        i = rowMapper(i);
         return new ColumnarRow(batch, i);
     }
 
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
index dd8ac08f2f57..758ef5d6e94c 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
@@ -33,13 +33,14 @@ public class OrcTimestampColumnVector extends AbstractOrcColumnVector
 
     private final TimestampColumnVector vector;
 
-    public OrcTimestampColumnVector(ColumnVector vector) {
-        super(vector);
+    public OrcTimestampColumnVector(ColumnVector vector, int[] selected) {
+        super(vector, selected);
         this.vector = (TimestampColumnVector) vector;
     }
 
     @Override
     public Timestamp getTimestamp(int i, int precision) {
+        i = rowMapper(i);
         int index = vector.isRepeating ? 0 : i;
         return DateTimeUtils.toInternal(vector.time[index], vector.nanos[index] % 1_000_000);
     }

From 462edc6b5cc58f7821fd4153dfe9ac38ce4f6bcf Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Sun, 22 Sep 2024 20:38:18 +0800
Subject: [PATCH 03/29] [format][orc] miss tolerateMissingSchema

---
 .../java/org/apache/paimon/format/orc/OrcReaderFactory.java   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
index 3d583333ad0d..21a7498704b2 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
@@ -268,7 +268,9 @@ private static RecordReader createRecordReader(
                             .schema(schema)
                             .range(offsetAndLength.getLeft(), offsetAndLength.getRight())
                             .useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
-                            .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf));
+                            .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
+                            .tolerateMissingSchema(
+                                    OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
             if (!conjunctPredicates.isEmpty()) {
                 options.useSelected(true);
                 options.allowSARGToFilter(true);

From 2226fb9898cf3f29990c404d0c428954a2b53ab6 Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Sun, 22 Sep 2024 21:08:24 +0800
Subject: [PATCH 04/29] [format][orc] fix orc selected close for no filter
 condition

---
 .../java/org/apache/paimon/format/orc/OrcReaderFactory.java  | 2 +-
 .../paimon/format/orc/reader/AbstractOrcColumnVector.java    | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
index 21a7498704b2..9071fe3b2525 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
@@ -123,7 +123,7 @@ public OrcReaderBatch createReaderBatch(
         for (int i = 0; i < vectors.length; i++) {
             String name = tableFieldNames.get(i);
             DataType type = tableFieldTypes.get(i);
-            int[] selected = orcBatch.getSelected();
+            int[] selected = orcBatch.selectedInUse ? orcBatch.getSelected() : null;
             vectors[i] =
                     createPaimonVector(
                             orcBatch.cols[tableFieldNames.indexOf(name)], selected, type);
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
index 5b612005f1c9..587ffc1c459d 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
@@ -48,7 +48,10 @@ public abstract class AbstractOrcColumnVector
     }
 
     protected int rowMapper(int r) {
-        return selected[r];
+        if (this.selected != null) {
+            return selected[r];
+        }
+        return r;
     }
 
     @Override

From ee915c893229ca7c308090004ffc31ec987e7990 Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Fri, 27 Sep 2024 11:39:48 +0800
Subject: [PATCH 05/29] [orc] keep useSelected and allowSARGToFilter close
 default, or deletion vectors would not work

---
 .../java/org/apache/paimon/format/orc/OrcReaderFactory.java | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
index 9071fe3b2525..2b9e88d89ccd 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
@@ -272,8 +272,10 @@ private static RecordReader createRecordReader(
                             .tolerateMissingSchema(
                                     OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
             if (!conjunctPredicates.isEmpty()) {
-                options.useSelected(true);
-                options.allowSARGToFilter(true);
+                // TODO fix it , if open this option,future deletion vectors would not work,
+                //  cased by getRowNumber would be changed .
+                options.useSelected(OrcConf.READER_USE_SELECTED.getBoolean(conf));
+                options.allowSARGToFilter(OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf));
             }
             // configure filters
             if (!conjunctPredicates.isEmpty()) {

From 2920fc953805d99d07053b48551602f86a12fc2e Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Tue, 15 Oct 2024 11:55:49 +0800
Subject: [PATCH 06/29] [format][orc] VectorizedRowBatch to OrcColumnVector for
 selected rows can be saw.

---
 .../paimon/format/orc/OrcReaderFactory.java   |  3 +-
 .../orc/reader/AbstractOrcColumnVector.java   | 33 ++++++++++---------
 .../orc/reader/OrcArrayColumnVector.java      |  8 +++--
 .../orc/reader/OrcBytesColumnVector.java      |  5 +--
 .../orc/reader/OrcDecimalColumnVector.java    |  5 +--
 .../orc/reader/OrcDoubleColumnVector.java     |  5 +--
 .../OrcLegacyTimestampColumnVector.java       |  5 +--
 .../orc/reader/OrcLongColumnVector.java       |  5 +--
 .../format/orc/reader/OrcMapColumnVector.java | 10 +++---
 .../format/orc/reader/OrcRowColumnVector.java |  8 +++--
 .../orc/reader/OrcTimestampColumnVector.java  |  5 +--
 11 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
index 2b9e88d89ccd..6b1590f03109 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
@@ -123,10 +123,9 @@ public OrcReaderBatch createReaderBatch(
         for (int i = 0; i < vectors.length; i++) {
             String name = tableFieldNames.get(i);
             DataType type = tableFieldTypes.get(i);
-            int[] selected = orcBatch.selectedInUse ? orcBatch.getSelected() : null;
             vectors[i] =
                     createPaimonVector(
-                            orcBatch.cols[tableFieldNames.indexOf(name)], selected, type);
+                            orcBatch.cols[tableFieldNames.indexOf(name)], orcBatch, type);
         }
         return new OrcReaderBatch(filePath, orcBatch, new VectorizedColumnBatch(vectors), recycler);
     }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
index 587ffc1c459d..377ff37a2e68 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
@@ -33,6 +33,7 @@
 import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 /** This column vector is used to adapt hive's ColumnVector to Paimon's ColumnVector. */
 public abstract class AbstractOrcColumnVector
@@ -40,18 +41,18 @@ public abstract class AbstractOrcColumnVector
 
     private final ColumnVector vector;
 
-    private final int[] selected;
+    private final VectorizedRowBatch orcBatch;
 
-    AbstractOrcColumnVector(ColumnVector vector, int[] selected) {
+    AbstractOrcColumnVector(ColumnVector vector, VectorizedRowBatch orcBatch) {
         this.vector = vector;
-        this.selected = selected;
+        this.orcBatch = orcBatch;
     }
 
     protected int rowMapper(int r) {
-        if (this.selected != null) {
-            return selected[r];
+        if (vector.isRepeating) {
+            return 0;
         }
-        return r;
+        return this.orcBatch.selectedInUse ? this.orcBatch.getSelected()[r] : r;
     }
 
     @Override
@@ -60,29 +61,29 @@ public boolean isNullAt(int i) {
     }
 
     public static org.apache.paimon.data.columnar.ColumnVector createPaimonVector(
-            ColumnVector vector, int[] selected, DataType dataType) {
+            ColumnVector vector, VectorizedRowBatch orcBatch, DataType dataType) {
         if (vector instanceof LongColumnVector) {
             if (dataType.getTypeRoot() == DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE) {
-                return new OrcLegacyTimestampColumnVector((LongColumnVector) vector, selected);
+                return new OrcLegacyTimestampColumnVector((LongColumnVector) vector, orcBatch);
             } else {
-                return new OrcLongColumnVector((LongColumnVector) vector, selected);
+                return new OrcLongColumnVector((LongColumnVector) vector, orcBatch);
             }
         } else if (vector instanceof DoubleColumnVector) {
-            return new OrcDoubleColumnVector((DoubleColumnVector) vector, selected);
+            return new OrcDoubleColumnVector((DoubleColumnVector) vector, orcBatch);
         } else if (vector instanceof BytesColumnVector) {
-            return new OrcBytesColumnVector((BytesColumnVector) vector, selected);
+            return new OrcBytesColumnVector((BytesColumnVector) vector, orcBatch);
         } else if (vector instanceof DecimalColumnVector) {
-            return new OrcDecimalColumnVector((DecimalColumnVector) vector, selected);
+            return new OrcDecimalColumnVector((DecimalColumnVector) vector, orcBatch);
         } else if (vector instanceof TimestampColumnVector) {
-            return new OrcTimestampColumnVector(vector, selected);
+            return new OrcTimestampColumnVector(vector, orcBatch);
         } else if (vector instanceof ListColumnVector) {
             return new OrcArrayColumnVector(
-                    (ListColumnVector) vector, selected, (ArrayType) dataType);
+                    (ListColumnVector) vector, orcBatch, (ArrayType) dataType);
         } else if (vector instanceof StructColumnVector) {
             return new OrcRowColumnVector(
-                    (StructColumnVector) vector, selected, (RowType) dataType);
+                    (StructColumnVector) vector, orcBatch, (RowType) dataType);
         } else if (vector instanceof MapColumnVector) {
-            return new OrcMapColumnVector((MapColumnVector) vector, selected, (MapType) dataType);
+            return new OrcMapColumnVector((MapColumnVector) vector, orcBatch, (MapType) dataType);
         } else {
             throw new UnsupportedOperationException(
                     "Unsupported vector: " + vector.getClass().getName());
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
index ade154221835..25a1935f3e4b 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
@@ -24,6 +24,7 @@
 import org.apache.paimon.types.ArrayType;
 
 import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 /** This column vector is used to adapt hive's ListColumnVector to Paimon's ArrayColumnVector. */
 public class OrcArrayColumnVector extends AbstractOrcColumnVector
@@ -32,10 +33,11 @@ public class OrcArrayColumnVector extends AbstractOrcColumnVector
     private final ListColumnVector hiveVector;
     private final ColumnVector paimonVector;
 
-    public OrcArrayColumnVector(ListColumnVector hiveVector, int[] selected, ArrayType type) {
-        super(hiveVector, selected);
+    public OrcArrayColumnVector(
+            ListColumnVector hiveVector, VectorizedRowBatch orcBatch, ArrayType type) {
+        super(hiveVector, orcBatch);
         this.hiveVector = hiveVector;
-        this.paimonVector = createPaimonVector(hiveVector.child, selected, type.getElementType());
+        this.paimonVector = createPaimonVector(hiveVector.child, orcBatch, type.getElementType());
     }
 
     @Override
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java
index a2664f3b8e45..92b4853aaae4 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java
@@ -19,6 +19,7 @@
 package org.apache.paimon.format.orc.reader;
 
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 /** This column vector is used to adapt hive's BytesColumnVector to Paimon's BytesColumnVector. */
 public class OrcBytesColumnVector extends AbstractOrcColumnVector
@@ -26,8 +27,8 @@ public class OrcBytesColumnVector extends AbstractOrcColumnVector
 
     private final BytesColumnVector vector;
 
-    public OrcBytesColumnVector(BytesColumnVector vector, int[] selected) {
-        super(vector, selected);
+    public OrcBytesColumnVector(BytesColumnVector vector, VectorizedRowBatch orcBatch) {
+        super(vector, orcBatch);
         this.vector = vector;
     }
 
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java
index 30963bb29fee..c8545723caf0 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java
@@ -21,6 +21,7 @@
 import org.apache.paimon.data.Decimal;
 
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 import java.math.BigDecimal;
 
@@ -32,8 +33,8 @@ public class OrcDecimalColumnVector extends AbstractOrcColumnVector
 
     private final DecimalColumnVector vector;
 
-    public OrcDecimalColumnVector(DecimalColumnVector vector, int[] selected) {
-        super(vector, selected);
+    public OrcDecimalColumnVector(DecimalColumnVector vector, VectorizedRowBatch orcBatch) {
+        super(vector, orcBatch);
         this.vector = vector;
     }
 
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java
index 0353e4aea9f4..3e19b137a222 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java
@@ -19,6 +19,7 @@
 package org.apache.paimon.format.orc.reader;
 
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 /**
  * This column vector is used to adapt hive's DoubleColumnVector to Paimon's float and double
@@ -30,8 +31,8 @@ public class OrcDoubleColumnVector extends AbstractOrcColumnVector
 
     private final DoubleColumnVector vector;
 
-    public OrcDoubleColumnVector(DoubleColumnVector vector, int[] selected) {
-        super(vector, selected);
+    public OrcDoubleColumnVector(DoubleColumnVector vector, VectorizedRowBatch orcBatch) {
+        super(vector, orcBatch);
         this.vector = vector;
     }
 
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java
index 508895632374..eb3e014f2fcf 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java
@@ -22,6 +22,7 @@
 
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 import java.time.LocalDateTime;
 
@@ -34,8 +35,8 @@ public class OrcLegacyTimestampColumnVector extends AbstractOrcColumnVector
 
     private final LongColumnVector hiveVector;
 
-    OrcLegacyTimestampColumnVector(LongColumnVector vector, int[] selected) {
-        super(vector, selected);
+    OrcLegacyTimestampColumnVector(LongColumnVector vector, VectorizedRowBatch orcBatch) {
+        super(vector, orcBatch);
         this.hiveVector = vector;
     }
 
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java
index 96f6922029a3..76c1c198d21d 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java
@@ -19,6 +19,7 @@
 package org.apache.paimon.format.orc.reader;
 
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 /**
  * This column vector is used to adapt hive's LongColumnVector to Paimon's boolean, byte, short, int
@@ -33,8 +34,8 @@ public class OrcLongColumnVector extends AbstractOrcColumnVector
 
     private final LongColumnVector vector;
 
-    public OrcLongColumnVector(LongColumnVector vector, int[] selected) {
-        super(vector, selected);
+    public OrcLongColumnVector(LongColumnVector vector, VectorizedRowBatch orcBatch) {
+        super(vector, orcBatch);
         this.vector = vector;
     }
 
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
index e45aea60b59c..c7245275fdd2 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcMapColumnVector.java
@@ -24,6 +24,7 @@
 import org.apache.paimon.types.MapType;
 
 import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 /** This column vector is used to adapt hive's MapColumnVector to Paimon's MapColumnVector. */
 public class OrcMapColumnVector extends AbstractOrcColumnVector
@@ -33,12 +34,13 @@ public class OrcMapColumnVector extends AbstractOrcColumnVector
     private final ColumnVector keyPaimonVector;
     private final ColumnVector valuePaimonVector;
 
-    public OrcMapColumnVector(MapColumnVector hiveVector, int[] selected, MapType type) {
-        super(hiveVector, selected);
+    public OrcMapColumnVector(
+            MapColumnVector hiveVector, VectorizedRowBatch orcBatch, MapType type) {
+        super(hiveVector, orcBatch);
         this.hiveVector = hiveVector;
-        this.keyPaimonVector = createPaimonVector(hiveVector.keys, selected, type.getKeyType());
+        this.keyPaimonVector = createPaimonVector(hiveVector.keys, orcBatch, type.getKeyType());
         this.valuePaimonVector =
-                createPaimonVector(hiveVector.values, selected, type.getValueType());
+                createPaimonVector(hiveVector.values, orcBatch, type.getValueType());
     }
 
     @Override
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
index 2572dcb565f2..6c73c9fdbe0d 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
@@ -24,6 +24,7 @@
 import org.apache.paimon.types.RowType;
 
 import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 /** This column vector is used to adapt hive's StructColumnVector to Flink's RowColumnVector. */
 public class OrcRowColumnVector extends AbstractOrcColumnVector
@@ -31,13 +32,14 @@ public class OrcRowColumnVector extends AbstractOrcColumnVector
 
     private final VectorizedColumnBatch batch;
 
-    public OrcRowColumnVector(StructColumnVector hiveVector, int[] selected, RowType type) {
-        super(hiveVector, selected);
+    public OrcRowColumnVector(
+            StructColumnVector hiveVector, VectorizedRowBatch orcBatch, RowType type) {
+        super(hiveVector, orcBatch);
         int len = hiveVector.fields.length;
         ColumnVector[] paimonVectors = new ColumnVector[len];
         for (int i = 0; i < len; i++) {
             paimonVectors[i] =
-                    createPaimonVector(hiveVector.fields[i], selected, type.getTypeAt(i));
+                    createPaimonVector(hiveVector.fields[i], orcBatch, type.getTypeAt(i));
         }
         this.batch = new VectorizedColumnBatch(paimonVectors);
     }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
index 758ef5d6e94c..8840a02a8c83 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
@@ -23,6 +23,7 @@
 
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
 /**
  * This column vector is used to adapt hive's TimestampColumnVector to Paimon's
@@ -33,8 +34,8 @@ public class OrcTimestampColumnVector extends AbstractOrcColumnVector
 
     private final TimestampColumnVector vector;
 
-    public OrcTimestampColumnVector(ColumnVector vector, int[] selected) {
-        super(vector, selected);
+    public OrcTimestampColumnVector(ColumnVector vector, VectorizedRowBatch orcBatch) {
+        super(vector, orcBatch);
         this.vector = (TimestampColumnVector) vector;
     }
 

From 8a89649cecb2e9c886e97cf7ef4a9baf4f1a0a7a Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Tue, 15 Oct 2024 16:09:59 +0800
Subject: [PATCH 07/29] [format][orc] remove all isRepeating

---
 .../format/orc/reader/AbstractOrcColumnVector.java     |  2 +-
 .../paimon/format/orc/reader/OrcBytesColumnVector.java |  5 ++---
 .../format/orc/reader/OrcDecimalColumnVector.java      |  3 +--
 .../format/orc/reader/OrcDoubleColumnVector.java       |  4 ++--
 .../orc/reader/OrcLegacyTimestampColumnVector.java     |  3 +--
 .../paimon/format/orc/reader/OrcLongColumnVector.java  | 10 +++++-----
 .../format/orc/reader/OrcTimestampColumnVector.java    |  3 +--
 7 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
index 377ff37a2e68..0557a72230cc 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/AbstractOrcColumnVector.java
@@ -57,7 +57,7 @@ protected int rowMapper(int r) {
 
     @Override
     public boolean isNullAt(int i) {
-        return !vector.noNulls && vector.isNull[vector.isRepeating ? 0 : i];
+        return !vector.noNulls && vector.isNull[rowMapper(i)];
     }
 
     public static org.apache.paimon.data.columnar.ColumnVector createPaimonVector(
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java
index 92b4853aaae4..7f812bb5628b 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcBytesColumnVector.java
@@ -34,11 +34,10 @@ public OrcBytesColumnVector(BytesColumnVector vector, VectorizedRowBatch orcBatc
 
     @Override
     public Bytes getBytes(int i) {
-        int rowId = vector.isRepeating ? 0 : i;
-        int selectedRowId = rowMapper(rowId);
+        int rowId = rowMapper(i);
         byte[][] data = vector.vector;
         int[] start = vector.start;
         int[] length = vector.length;
-        return new Bytes(data[selectedRowId], start[selectedRowId], length[selectedRowId]);
+        return new Bytes(data[rowId], start[rowId], length[rowId]);
     }
 }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java
index c8545723caf0..382c19f45be1 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDecimalColumnVector.java
@@ -41,8 +41,7 @@ public OrcDecimalColumnVector(DecimalColumnVector vector, VectorizedRowBatch orc
     @Override
     public Decimal getDecimal(int i, int precision, int scale) {
         i = rowMapper(i);
-        BigDecimal data =
-                vector.vector[vector.isRepeating ? 0 : i].getHiveDecimal().bigDecimalValue();
+        BigDecimal data = vector.vector[i].getHiveDecimal().bigDecimalValue();
         return Decimal.fromBigDecimal(data, precision, scale);
     }
 }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java
index 3e19b137a222..f26dac6de9da 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcDoubleColumnVector.java
@@ -39,12 +39,12 @@ public OrcDoubleColumnVector(DoubleColumnVector vector, VectorizedRowBatch orcBa
     @Override
     public double getDouble(int i) {
         i = rowMapper(i);
-        return vector.vector[vector.isRepeating ? 0 : i];
+        return vector.vector[i];
     }
 
     @Override
     public float getFloat(int i) {
         i = rowMapper(i);
-        return (float) vector.vector[vector.isRepeating ? 0 : i];
+        return (float) vector.vector[i];
     }
 }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java
index eb3e014f2fcf..5107e722edb4 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLegacyTimestampColumnVector.java
@@ -43,8 +43,7 @@ public class OrcLegacyTimestampColumnVector extends AbstractOrcColumnVector
     @Override
     public Timestamp getTimestamp(int i, int precision) {
         i = rowMapper(i);
-        int index = hiveVector.isRepeating ? 0 : i;
-        java.sql.Timestamp timestamp = toTimestamp(hiveVector.vector[index]);
+        java.sql.Timestamp timestamp = toTimestamp(hiveVector.vector[i]);
         return Timestamp.fromSQLTimestamp(timestamp);
     }
 
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java
index 76c1c198d21d..c289b74c58b2 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcLongColumnVector.java
@@ -42,30 +42,30 @@ public OrcLongColumnVector(LongColumnVector vector, VectorizedRowBatch orcBatch)
     @Override
     public long getLong(int i) {
         i = rowMapper(i);
-        return vector.vector[vector.isRepeating ? 0 : i];
+        return vector.vector[i];
     }
 
     @Override
     public boolean getBoolean(int i) {
         i = rowMapper(i);
-        return vector.vector[vector.isRepeating ? 0 : i] == 1;
+        return vector.vector[i] == 1;
     }
 
     @Override
     public byte getByte(int i) {
         i = rowMapper(i);
-        return (byte) vector.vector[vector.isRepeating ? 0 : i];
+        return (byte) vector.vector[i];
     }
 
     @Override
     public int getInt(int i) {
         i = rowMapper(i);
-        return (int) vector.vector[vector.isRepeating ? 0 : i];
+        return (int) vector.vector[i];
     }
 
     @Override
     public short getShort(int i) {
         i = rowMapper(i);
-        return (short) vector.vector[vector.isRepeating ? 0 : i];
+        return (short) vector.vector[i];
     }
 }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
index 8840a02a8c83..a6e71d6016f2 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
@@ -42,7 +42,6 @@ public OrcTimestampColumnVector(ColumnVector vector, VectorizedRowBatch orcBatch
     @Override
     public Timestamp getTimestamp(int i, int precision) {
         i = rowMapper(i);
-        int index = vector.isRepeating ? 0 : i;
-        return DateTimeUtils.toInternal(vector.time[index], vector.nanos[index] % 1_000_000);
+        return DateTimeUtils.toInternal(vector.time[i], vector.nanos[i] % 1_000_000);
     }
 }

From c745e557b909ec28c3d6d15fc67b4ffc1fd2c9e1 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Mon, 11 Nov 2024 10:01:10 +0000
Subject: [PATCH 08/29] [core][format] merge with aa16c2bf1

---
 .../org/apache/paimon/format/FileFormat.java  | 13 +++++
 .../apache/paimon/fs/ObjectCacheManager.java  | 57 +++++++++++++++++++
 .../apache/paimon/fs/hadoop/HadoopFileIO.java | 27 +--------
 .../manifest/ManifestEntrySerializer.java     |  7 +++
 .../apache/paimon/manifest/ManifestFile.java  | 55 +++++++++++++++++-
 .../operation/AbstractFileStoreScan.java      | 31 +++++++++-
 .../paimon/operation/FileStoreScan.java       |  2 +
 .../table/source/AbstractDataTableScan.java   |  6 ++
 .../paimon/table/source/InnerTableScan.java   |  9 +++
 .../table/source/snapshot/SnapshotReader.java |  2 +
 .../source/snapshot/SnapshotReaderImpl.java   | 13 ++++-
 .../paimon/table/system/AuditLogTable.java    |  6 ++
 .../org/apache/paimon/utils/ObjectsCache.java |  7 +++
 .../paimon/format/orc/OrcFileFormat.java      | 28 +++++++--
 .../format/orc/reader/OrcRowColumnVector.java |  2 +-
 15 files changed, 228 insertions(+), 37 deletions(-)
 create mode 100644 paimon-common/src/main/java/org/apache/paimon/fs/ObjectCacheManager.java

diff --git a/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java b/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
index 78dfe1c0cad1..732f1f01e1a7 100644
--- a/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
+++ b/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
@@ -21,6 +21,7 @@
 import org.apache.paimon.CoreOptions;
 import org.apache.paimon.annotation.VisibleForTesting;
 import org.apache.paimon.format.FileFormatFactory.FormatContext;
+import org.apache.paimon.fs.ObjectCacheManager;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.predicate.Predicate;
 import org.apache.paimon.statistics.SimpleColStatsCollector;
@@ -28,6 +29,7 @@
 
 import javax.annotation.Nullable;
 
+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
@@ -40,6 +42,9 @@
  */
 public abstract class FileFormat {
 
+    private static final ObjectCacheManager<String, FileFormatFactory> formatFactoryCache =
+            ObjectCacheManager.newObjectCacheManager(Duration.ofDays(365), 1000);
+
     protected String formatIdentifier;
 
     protected FileFormat(String formatIdentifier) {
@@ -92,9 +97,17 @@ public static FileFormat fromIdentifier(String identifier, FormatContext context
 
     private static Optional<FileFormat> fromIdentifier(
             String formatIdentifier, FormatContext context, ClassLoader classLoader) {
+
+        FileFormatFactory fileFormatFactory =
+                formatFactoryCache.getIfPresent(formatIdentifier.toLowerCase());
+        if (fileFormatFactory != null) {
+            return Optional.of(fileFormatFactory.create(context));
+        }
+
         ServiceLoader<FileFormatFactory> serviceLoader =
                 ServiceLoader.load(FileFormatFactory.class, classLoader);
         for (FileFormatFactory factory : serviceLoader) {
+            formatFactoryCache.put(factory.identifier(), factory);
             if (factory.identifier().equals(formatIdentifier.toLowerCase())) {
                 return Optional.of(factory.create(context));
             }
diff --git a/paimon-common/src/main/java/org/apache/paimon/fs/ObjectCacheManager.java b/paimon-common/src/main/java/org/apache/paimon/fs/ObjectCacheManager.java
new file mode 100644
index 000000000000..f157578107eb
--- /dev/null
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/ObjectCacheManager.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.fs;
+
+import org.apache.paimon.shade.caffeine2.com.github.benmanes.caffeine.cache.Cache;
+import org.apache.paimon.shade.caffeine2.com.github.benmanes.caffeine.cache.Caffeine;
+
+import java.time.Duration;
+import java.util.function.Function;
+
+/**
+ * Sample Object Cache Manager .
+ *
+ * @param <K>
+ * @param <V>
+ */
+public class ObjectCacheManager<K, V> {
+    private final Cache<K, V> cache;
+
+    private ObjectCacheManager(Duration timeout, int maxSize) {
+        this.cache = Caffeine.newBuilder().maximumSize(maxSize).expireAfterWrite(timeout).build();
+    }
+
+    public static <K, V> ObjectCacheManager<K, V> newObjectCacheManager(
+            Duration timeout, int maxSize) {
+        return new ObjectCacheManager<>(timeout, maxSize);
+    }
+
+    public ObjectCacheManager<K, V> put(K k, V v) {
+        this.cache.put(k, v);
+        return this;
+    }
+
+    public V get(K k, Function<? super K, ? extends V> creator) {
+        return this.cache.get(k, creator);
+    }
+
+    public V getIfPresent(K k) {
+        return this.cache.getIfPresent(k);
+    }
+}
diff --git a/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java b/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java
index 70325ee69635..6b8104f1b6da 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java
@@ -27,7 +27,6 @@
 import org.apache.paimon.fs.SeekableInputStream;
 import org.apache.paimon.hadoop.SerializableConfiguration;
 import org.apache.paimon.utils.FunctionWithException;
-import org.apache.paimon.utils.Pair;
 import org.apache.paimon.utils.ReflectionUtils;
 
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -39,10 +38,7 @@
 import java.io.OutputStreamWriter;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
-import java.net.URI;
 import java.nio.charset.StandardCharsets;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicReference;
 
 /** Hadoop {@link FileIO}. */
@@ -52,8 +48,6 @@ public class HadoopFileIO implements FileIO {
 
     protected SerializableConfiguration hadoopConf;
 
-    protected transient volatile Map<Pair<String, String>, FileSystem> fsMap;
-
     @VisibleForTesting
     public void setFileSystem(Path path, FileSystem fs) throws IOException {
         org.apache.hadoop.fs.Path hadoopPath = path(path);
@@ -149,26 +143,7 @@ private FileSystem getFileSystem(
             org.apache.hadoop.fs.Path path,
             FunctionWithException<org.apache.hadoop.fs.Path, FileSystem, IOException> creator)
             throws IOException {
-        if (fsMap == null) {
-            synchronized (this) {
-                if (fsMap == null) {
-                    fsMap = new ConcurrentHashMap<>();
-                }
-            }
-        }
-
-        Map<Pair<String, String>, FileSystem> map = fsMap;
-
-        URI uri = path.toUri();
-        String scheme = uri.getScheme();
-        String authority = uri.getAuthority();
-        Pair<String, String> key = Pair.of(scheme, authority);
-        FileSystem fs = map.get(key);
-        if (fs == null) {
-            fs = creator.apply(path);
-            map.put(key, fs);
-        }
-        return fs;
+        return creator.apply(path);
     }
 
     protected FileSystem createFileSystem(org.apache.hadoop.fs.Path path) throws IOException {
diff --git a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestEntrySerializer.java b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestEntrySerializer.java
index 2c3ba2aeaab3..9bee11b64bbe 100644
--- a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestEntrySerializer.java
+++ b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestEntrySerializer.java
@@ -37,11 +37,18 @@ public class ManifestEntrySerializer extends VersionedObjectSerializer<ManifestE
 
     private final DataFileMetaSerializer dataFileMetaSerializer;
 
+    private static final ManifestEntrySerializer MANIFEST_ENTRY_SERIALIZER =
+            new ManifestEntrySerializer();
+
     public ManifestEntrySerializer() {
         super(ManifestEntry.SCHEMA);
         this.dataFileMetaSerializer = new DataFileMetaSerializer();
     }
 
+    public static ManifestEntrySerializer getInstance() {
+        return MANIFEST_ENTRY_SERIALIZER;
+    }
+
     @Override
     public int getVersion() {
         return 2;
diff --git a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
index 48216b0c474b..f7413f10156e 100644
--- a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
+++ b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
@@ -24,9 +24,11 @@
 import org.apache.paimon.format.FormatWriterFactory;
 import org.apache.paimon.format.SimpleStatsCollector;
 import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.ObjectCacheManager;
 import org.apache.paimon.fs.Path;
 import org.apache.paimon.io.RollingFileWriter;
 import org.apache.paimon.io.SingleFileWriter;
+import org.apache.paimon.predicate.Predicate;
 import org.apache.paimon.schema.SchemaManager;
 import org.apache.paimon.stats.SimpleStatsConverter;
 import org.apache.paimon.types.RowType;
@@ -39,7 +41,9 @@
 import javax.annotation.Nullable;
 
 import java.io.IOException;
+import java.time.Duration;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * This file includes several {@link ManifestEntry}s, representing the additional changes since last
@@ -197,15 +201,60 @@ public boolean isCacheEnabled() {
         }
 
         public ManifestFile create() {
+            return create(null);
+        }
+
+        private static class FormatReaderFactoryKey {
+            private final RowType entryType;
+            private final List<Predicate> filters;
+            private final String formatIdentifier;
+
+            public FormatReaderFactoryKey(
+                    String formatIdentifier, RowType entryType, List<Predicate> filters) {
+                this.entryType = entryType;
+                this.filters = filters;
+                this.formatIdentifier = formatIdentifier;
+            }
+
+            @Override
+            public boolean equals(Object o) {
+                if (o == null || getClass() != o.getClass()) {
+                    return false;
+                }
+                FormatReaderFactoryKey that = (FormatReaderFactoryKey) o;
+                return Objects.equals(entryType, that.entryType)
+                        && Objects.equals(filters, that.filters)
+                        && Objects.equals(formatIdentifier, that.formatIdentifier);
+            }
+
+            @Override
+            public int hashCode() {
+                return Objects.hash(entryType, filters, formatIdentifier);
+            }
+        }
+
+        private static final ObjectCacheManager<FormatReaderFactoryKey, FormatReaderFactory>
+                readers = ObjectCacheManager.newObjectCacheManager(Duration.ofDays(365), 1000);
+        private static final ObjectCacheManager<FormatReaderFactoryKey, FormatWriterFactory>
+                writers = ObjectCacheManager.newObjectCacheManager(Duration.ofDays(365), 1000);
+
+        public ManifestFile create(List<Predicate> filters) {
+            String formatIdentifier = this.fileFormat.getFormatIdentifier();
             RowType entryType = VersionedObjectSerializer.versionType(ManifestEntry.SCHEMA);
+            FormatReaderFactoryKey formatReaderFactoryKey =
+                    new FormatReaderFactoryKey(formatIdentifier, entryType, filters);
             return new ManifestFile(
                     fileIO,
                     schemaManager,
                     partitionType,
-                    new ManifestEntrySerializer(),
+                    ManifestEntrySerializer.getInstance(),
                     entryType,
-                    fileFormat.createReaderFactory(entryType),
-                    fileFormat.createWriterFactory(entryType),
+                    readers.get(
+                            formatReaderFactoryKey,
+                            (ignore) -> fileFormat.createReaderFactory(entryType, filters)),
+                    writers.get(
+                            formatReaderFactoryKey,
+                            (ignore) -> fileFormat.createWriterFactory(entryType)),
                     compression,
                     pathFactory.manifestFileFactory(),
                     suggestedFileSize,
diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java
index d043932810c3..394eb0b3ffa4 100644
--- a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java
@@ -35,9 +35,12 @@
 import org.apache.paimon.operation.metrics.ScanStats;
 import org.apache.paimon.partition.PartitionPredicate;
 import org.apache.paimon.predicate.Predicate;
+import org.apache.paimon.predicate.PredicateBuilder;
 import org.apache.paimon.schema.SchemaManager;
 import org.apache.paimon.schema.TableSchema;
 import org.apache.paimon.table.source.ScanMode;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.IntType;
 import org.apache.paimon.types.RowType;
 import org.apache.paimon.utils.FileStorePathFactory;
 import org.apache.paimon.utils.Filter;
@@ -82,6 +85,7 @@ public abstract class AbstractFileStoreScan implements FileStoreScan {
 
     private Snapshot specifiedSnapshot = null;
     private Filter<Integer> bucketFilter = null;
+    private List<Integer> buckets;
     private List<ManifestFileMeta> specifiedManifests = null;
     protected ScanMode scanMode = ScanMode.ALL;
     private Filter<Integer> levelFilter = null;
@@ -136,6 +140,14 @@ public FileStoreScan withPartitionFilter(PartitionPredicate predicate) {
     @Override
     public FileStoreScan withBucket(int bucket) {
         this.bucketFilter = i -> i == bucket;
+        this.buckets = Collections.singletonList(bucket);
+        return this;
+    }
+
+    @Override
+    public FileStoreScan withBuckets(List<Integer> buckets) {
+        this.bucketFilter = buckets::contains;
+        this.buckets = buckets;
         return this;
     }
 
@@ -416,7 +428,7 @@ private boolean filterMergedManifestEntry(ManifestEntry entry) {
     public List<ManifestEntry> readManifest(ManifestFileMeta manifest) {
         List<ManifestEntry> entries =
                 manifestFileFactory
-                        .create()
+                        .create(createPushDownFilter(buckets, numOfBuckets))
                         .read(
                                 manifest.fileName(),
                                 manifest.fileSize(),
@@ -480,6 +492,23 @@ private static Filter<InternalRow> createCacheRowFilter(
         };
     }
 
+    /**
+     * Read the corresponding entries based on the current required partition and bucket.
+     *
+     * <p>Implemented to {@link InternalRow} is for performance (No deserialization).
+     */
+    private static List<Predicate> createPushDownFilter(List<Integer> buckets, int numOfBuckets) {
+        if (buckets == null || buckets.isEmpty()) {
+            return null;
+        }
+        List<Predicate> predicates = new ArrayList<>();
+        PredicateBuilder predicateBuilder =
+                new PredicateBuilder(
+                        RowType.of(new DataType[] {new IntType()}, new String[] {"_BUCKET"}));
+        predicates.add(predicateBuilder.in(0, new ArrayList<>(buckets)));
+        return predicates;
+    }
+
     /**
      * Read the corresponding entries based on the current required partition and bucket.
      *
diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreScan.java b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreScan.java
index 197b4dff4962..4f3cd6e86463 100644
--- a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreScan.java
@@ -55,6 +55,8 @@ public interface FileStoreScan {
 
     FileStoreScan withBucket(int bucket);
 
+    FileStoreScan withBuckets(List<Integer> buckets);
+
     FileStoreScan withBucketFilter(Filter<Integer> bucketFilter);
 
     FileStoreScan withPartitionBucket(BinaryRow partition, int bucket);
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractDataTableScan.java b/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractDataTableScan.java
index ba1bc6588f68..1e292de12a40 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractDataTableScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractDataTableScan.java
@@ -80,6 +80,12 @@ public AbstractDataTableScan withBucketFilter(Filter<Integer> bucketFilter) {
         return this;
     }
 
+    @Override
+    public AbstractDataTableScan withBuckets(List<Integer> buckets) {
+        snapshotReader.withBuckets(buckets);
+        return this;
+    }
+
     @Override
     public AbstractDataTableScan withPartitionFilter(Map<String, String> partitionSpec) {
         snapshotReader.withPartitionFilter(partitionSpec);
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java b/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java
index 00a4fc0cde18..5dfd83640f5d 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java
@@ -23,6 +23,7 @@
 import org.apache.paimon.predicate.Predicate;
 import org.apache.paimon.utils.Filter;
 
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
@@ -47,6 +48,14 @@ default InnerTableScan withBucketFilter(Filter<Integer> bucketFilter) {
         return this;
     }
 
+    default InnerTableScan withBucket(Integer bucket) {
+        return withBuckets(Collections.singletonList(bucket));
+    }
+
+    default InnerTableScan withBuckets(List<Integer> buckets) {
+        throw new RuntimeException("not impl withBuckets for " + this.getClass().getName());
+    }
+
     default InnerTableScan withLevelFilter(Filter<Integer> levelFilter) {
         return this;
     }
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
index bd07f49fda01..ec53fdfb1b88 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
@@ -80,6 +80,8 @@ public interface SnapshotReader {
 
     SnapshotReader withBucket(int bucket);
 
+    SnapshotReader withBuckets(List<Integer> buckets);
+
     SnapshotReader withBucketFilter(Filter<Integer> bucketFilter);
 
     SnapshotReader withDataFileNameFilter(Filter<String> fileNameFilter);
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
index b2fd1c7850f5..2cf4164f7593 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
@@ -245,6 +245,11 @@ public SnapshotReader withBucket(int bucket) {
         return this;
     }
 
+    public SnapshotReader withBuckets(List<Integer> buckets) {
+        scan.withBuckets(buckets);
+        return this;
+    }
+
     @Override
     public SnapshotReader withBucketFilter(Filter<Integer> bucketFilter) {
         scan.withBucketFilter(bucketFilter);
@@ -271,7 +276,13 @@ public SnapshotReader withShard(int indexOfThisSubtask, int numberOfParallelSubt
                             Math.abs(file.hashCode() % numberOfParallelSubtasks)
                                     == indexOfThisSubtask);
         } else {
-            withBucketFilter(bucket -> bucket % numberOfParallelSubtasks == indexOfThisSubtask);
+            List<Integer> buckets = new ArrayList<>();
+            for (int bucket = 0; bucket < numberOfParallelSubtasks; bucket++) {
+                if (bucket % numberOfParallelSubtasks == indexOfThisSubtask) {
+                    buckets.add(bucket);
+                }
+            }
+            withBuckets(buckets);
         }
         return this;
     }
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
index d9cf80289953..92556dd51704 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
@@ -334,6 +334,12 @@ public SnapshotReader withBucket(int bucket) {
             return this;
         }
 
+        @Override
+        public SnapshotReader withBuckets(List<Integer> buckets) {
+            snapshotReader.withBuckets(buckets);
+            return this;
+        }
+
         @Override
         public SnapshotReader withBucketFilter(Filter<Integer> bucketFilter) {
             snapshotReader.withBucketFilter(bucketFilter);
diff --git a/paimon-core/src/main/java/org/apache/paimon/utils/ObjectsCache.java b/paimon-core/src/main/java/org/apache/paimon/utils/ObjectsCache.java
index 8c490e008baa..ff1c76cb0a03 100644
--- a/paimon-core/src/main/java/org/apache/paimon/utils/ObjectsCache.java
+++ b/paimon-core/src/main/java/org/apache/paimon/utils/ObjectsCache.java
@@ -28,6 +28,9 @@
 import org.apache.paimon.memory.MemorySegmentSource;
 import org.apache.paimon.types.RowType;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import javax.annotation.Nullable;
 import javax.annotation.concurrent.ThreadSafe;
 
@@ -41,6 +44,7 @@
 /** Cache records to {@link SegmentsCache} by compacted serializer. */
 @ThreadSafe
 public class ObjectsCache<K, V> {
+    protected static final Logger LOG = LoggerFactory.getLogger(ObjectsCache.class);
 
     private final SegmentsCache<K> cache;
     private final ObjectSerializer<V> projectedSerializer;
@@ -72,6 +76,9 @@ public List<V> read(
         if (segments != null) {
             return readFromSegments(segments, readFilter);
         } else {
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("not match cache key {}", key);
+            }
             if (fileSize == null) {
                 fileSize = fileSizeFunction.apply(key);
             }
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
index c2db03aa16ed..e2e2c057e1a6 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java
@@ -31,6 +31,7 @@
 import org.apache.paimon.format.orc.reader.OrcSplitReaderUtil;
 import org.apache.paimon.format.orc.writer.RowDataVectorizer;
 import org.apache.paimon.format.orc.writer.Vectorizer;
+import org.apache.paimon.fs.ObjectCacheManager;
 import org.apache.paimon.options.MemorySize;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.predicate.Predicate;
@@ -44,12 +45,14 @@
 import org.apache.paimon.types.MultisetType;
 import org.apache.paimon.types.RowType;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.orc.OrcConf;
 import org.apache.orc.TypeDescription;
 
 import javax.annotation.Nullable;
 import javax.annotation.concurrent.ThreadSafe;
 
+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
@@ -70,13 +73,29 @@ public class OrcFileFormat extends FileFormat {
     private final int readBatchSize;
     private final int writeBatchSize;
 
+    private static final org.apache.hadoop.conf.Configuration emptyConf =
+            new org.apache.hadoop.conf.Configuration();
+    private static final ObjectCacheManager<Properties, Configuration> configCache =
+            ObjectCacheManager.newObjectCacheManager(Duration.ofDays(365), 1000);
+
+    static {
+        emptyConf.set("paimon.empty.configuration", "paimon.empty.configuration");
+    }
+
     public OrcFileFormat(FormatContext formatContext) {
         super(IDENTIFIER);
         this.orcProperties = getOrcProperties(formatContext.formatOptions(), formatContext);
-        this.readerConf = new org.apache.hadoop.conf.Configuration();
-        this.orcProperties.forEach((k, v) -> readerConf.set(k.toString(), v.toString()));
-        this.writerConf = new org.apache.hadoop.conf.Configuration();
-        this.orcProperties.forEach((k, v) -> writerConf.set(k.toString(), v.toString()));
+        Configuration conf;
+        Configuration cachedConf = configCache.getIfPresent(orcProperties);
+        if (cachedConf != null) {
+            conf = cachedConf;
+        } else {
+            conf = new org.apache.hadoop.conf.Configuration(emptyConf);
+            this.orcProperties.forEach((k, v) -> conf.set(k.toString(), v.toString()));
+            configCache.put(orcProperties, conf);
+        }
+        this.readerConf = conf;
+        this.writerConf = conf;
         this.readBatchSize = formatContext.readBatchSize();
         this.writeBatchSize = formatContext.writeBatchSize();
     }
@@ -146,7 +165,6 @@ public FormatWriterFactory createWriterFactory(RowType type) {
 
     private static Properties getOrcProperties(Options options, FormatContext formatContext) {
         Properties orcProperties = new Properties();
-
         Properties properties = new Properties();
         options.addAllToProperties(properties);
         properties.forEach((k, v) -> orcProperties.put(IDENTIFIER + "." + k, v));
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
index 6c73c9fdbe0d..b925f5b536b1 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcRowColumnVector.java
@@ -46,7 +46,7 @@ public OrcRowColumnVector(
 
     @Override
     public ColumnarRow getRow(int i) {
-        i = rowMapper(i);
+        // no need to call rowMapper here .
         return new ColumnarRow(batch, i);
     }
 

From bbdd316ed2450fe77672b4f1d470be65d5663df9 Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Mon, 11 Nov 2024 19:11:01 +0800
Subject: [PATCH 09/29] [core] fix AuditLogTable merge error

---
 .../main/java/org/apache/paimon/table/system/AuditLogTable.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
index c7fb56c5b4a3..a11e88074932 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
@@ -332,7 +332,7 @@ public SnapshotReader withBucket(int bucket) {
 
         @Override
         public SnapshotReader withBuckets(List<Integer> buckets) {
-            snapshotReader.withBuckets(buckets);
+            wrapped.withBuckets(buckets);
             return this;
         }
 

From 10ef09c343e149b9a13dc43ff47239de9222026c Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Tue, 12 Nov 2024 04:01:45 +0000
Subject: [PATCH 10/29] [format] recover HadoopFileIO

---
 .../apache/paimon/fs/hadoop/HadoopFileIO.java | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java b/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java
index 6b8104f1b6da..c948536660f5 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java
@@ -27,6 +27,7 @@
 import org.apache.paimon.fs.SeekableInputStream;
 import org.apache.paimon.hadoop.SerializableConfiguration;
 import org.apache.paimon.utils.FunctionWithException;
+import org.apache.paimon.utils.Pair;
 import org.apache.paimon.utils.ReflectionUtils;
 
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -39,6 +40,9 @@
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.nio.charset.StandardCharsets;
+import java.net.URI;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicReference;
 
 /** Hadoop {@link FileIO}. */
@@ -48,6 +52,8 @@ public class HadoopFileIO implements FileIO {
 
     protected SerializableConfiguration hadoopConf;
 
+    protected transient volatile Map<Pair<String, String>, FileSystem> fsMap;
+
     @VisibleForTesting
     public void setFileSystem(Path path, FileSystem fs) throws IOException {
         org.apache.hadoop.fs.Path hadoopPath = path(path);
@@ -143,7 +149,26 @@ private FileSystem getFileSystem(
             org.apache.hadoop.fs.Path path,
             FunctionWithException<org.apache.hadoop.fs.Path, FileSystem, IOException> creator)
             throws IOException {
-        return creator.apply(path);
+        if (fsMap == null) {
+            synchronized (this) {
+                if (fsMap == null) {
+                    fsMap = new ConcurrentHashMap<>();
+                }
+            }
+        }
+
+        Map<Pair<String, String>, FileSystem> map = fsMap;
+
+        URI uri = path.toUri();
+        String scheme = uri.getScheme();
+        String authority = uri.getAuthority();
+        Pair<String, String> key = Pair.of(scheme, authority);
+        FileSystem fs = map.get(key);
+        if (fs == null) {
+            fs = creator.apply(path);
+            map.put(key, fs);
+        }
+        return fs;
     }
 
     protected FileSystem createFileSystem(org.apache.hadoop.fs.Path path) throws IOException {

From a2acbab853f6e1d187a2ab35684476a395ef718f Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Tue, 12 Nov 2024 12:04:54 +0800
Subject: [PATCH 11/29] [format] checkstyle

---
 .../src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java b/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java
index c948536660f5..70325ee69635 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/hadoop/HadoopFileIO.java
@@ -39,8 +39,8 @@
 import java.io.OutputStreamWriter;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
-import java.nio.charset.StandardCharsets;
 import java.net.URI;
+import java.nio.charset.StandardCharsets;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicReference;

From e1c90c70db49a780ac32f10ce033f59cc9e3cf62 Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Tue, 12 Nov 2024 12:17:08 +0800
Subject: [PATCH 12/29] [format][orc] add pushdown option only for reader .

---
 .../src/main/java/org/apache/orc/OrcConf.java     | 15 +++++++++++++++
 .../paimon/format/orc/OrcReaderFactory.java       |  5 +++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/paimon-format/src/main/java/org/apache/orc/OrcConf.java b/paimon-format/src/main/java/org/apache/orc/OrcConf.java
index a7fa1a21bc8c..ee07e45117a4 100644
--- a/paimon-format/src/main/java/org/apache/orc/OrcConf.java
+++ b/paimon-format/src/main/java/org/apache/orc/OrcConf.java
@@ -305,6 +305,21 @@ public enum OrcConf {
                     + "must have the filter\n"
                     + "reapplied to avoid using unset values in the unselected rows.\n"
                     + "If unsure please leave this as false."),
+
+    READER_ONLY_ALLOW_SARG_TO_FILTER(
+            "orc.reader.sarg.to.filter",
+            "orc.reader.sarg.to.filter",
+            false,
+            "A boolean flag to determine if a SArg is allowed to become a filter, only for reader."),
+    READER_ONLY_USE_SELECTED(
+            "orc.reader.filter.use.selected",
+            "orc.reader.filter.use.selected",
+            false,
+            "A boolean flag to determine if the selected vector is supported by\n"
+                    + "the reading application, only for reader.  If false, the output of the ORC reader "
+                    + "must have the filter\n"
+                    + "reapplied to avoid using unset values in the unselected rows.\n"
+                    + "If unsure please leave this as false."),
     ALLOW_PLUGIN_FILTER(
             "orc.filter.plugin",
             "orc.filter.plugin",
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
index 4b4c967f4de2..5543fd791075 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java
@@ -278,8 +278,9 @@ private static RecordReader createRecordReader(
             if (!conjunctPredicates.isEmpty()) {
                 // TODO fix it , if open this option,future deletion vectors would not work,
                 //  cased by getRowNumber would be changed .
-                options.useSelected(OrcConf.READER_USE_SELECTED.getBoolean(conf));
-                options.allowSARGToFilter(OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf));
+                options.useSelected(OrcConf.READER_ONLY_USE_SELECTED.getBoolean(conf));
+                options.allowSARGToFilter(
+                        OrcConf.READER_ONLY_ALLOW_SARG_TO_FILTER.getBoolean(conf));
             }
             // configure filters
             if (!conjunctPredicates.isEmpty()) {

From 8c9a75c3dbb0ecd1e759a082f78ed34100159312 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Tue, 12 Nov 2024 10:09:00 +0000
Subject: [PATCH 13/29] [core] recover bucket

---
 .../paimon/table/source/snapshot/SnapshotReaderImpl.java  | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
index 14cf6dfae41a..74f257ddcf9d 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
@@ -277,13 +277,7 @@ public SnapshotReader withShard(int indexOfThisSubtask, int numberOfParallelSubt
                             Math.abs(file.hashCode() % numberOfParallelSubtasks)
                                     == indexOfThisSubtask);
         } else {
-            List<Integer> buckets = new ArrayList<>();
-            for (int bucket = 0; bucket < numberOfParallelSubtasks; bucket++) {
-                if (bucket % numberOfParallelSubtasks == indexOfThisSubtask) {
-                    buckets.add(bucket);
-                }
-            }
-            withBuckets(buckets);
+            withBucketFilter(bucket -> bucket % numberOfParallelSubtasks == indexOfThisSubtask);
         }
         return this;
     }

From f71d658ab09dfde580396220a85ef0d22bc039b7 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Tue, 12 Nov 2024 10:44:58 +0000
Subject: [PATCH 14/29] [core][format] add test for withBuckets and orcFormat

---
 .../operation/KeyValueFileStoreScanTest.java  | 31 +++++++--
 .../paimon/format/orc/OrcFileFormatTest.java  | 64 +++++++++++++++++++
 2 files changed, 88 insertions(+), 7 deletions(-)

diff --git a/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java b/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java
index ce17450538b1..8d6ab5aad37c 100644
--- a/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java
@@ -40,13 +40,7 @@
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.stream.Collectors;
 
@@ -225,6 +219,29 @@ public void testWithBucket() throws Exception {
         runTestExactMatch(scan, snapshot.id(), expected);
     }
 
+    @Test
+    public void testWithBuckets() throws Exception {
+        ThreadLocalRandom random = ThreadLocalRandom.current();
+        List<KeyValue> data = generateData(random.nextInt(1000) + 1);
+        Snapshot snapshot = writeData(data);
+
+        int wantedBucket1 = random.nextInt(NUM_BUCKETS);
+        int wantedBucket2 = random.nextInt(NUM_BUCKETS);
+        int wantedBucket3 = random.nextInt(NUM_BUCKETS);
+        List<Integer> buckets = Arrays.asList(wantedBucket1, wantedBucket2, wantedBucket3);
+
+        FileStoreScan scan = store.newScan();
+        scan.withSnapshot(snapshot.id());
+        scan.withBuckets(buckets);
+
+        Map<BinaryRow, BinaryRow> expected =
+                store.toKvMap(
+                        data.stream()
+                                .filter(kv -> buckets.contains(getBucket(kv)))
+                                .collect(Collectors.toList()));
+        runTestExactMatch(scan, snapshot.id(), expected);
+    }
+
     @Test
     public void testWithSnapshot() throws Exception {
         ThreadLocalRandom random = ThreadLocalRandom.current();
diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
index 46bf6afe6613..216c083eec66 100644
--- a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
+++ b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
@@ -83,4 +83,68 @@ public void testSupportedDataTypes() {
         dataFields.add(new DataField(index++, "decimal_type", DataTypes.DECIMAL(10, 3)));
         orc.validateDataFields(new RowType(dataFields));
     }
+
+    @Test
+    public void testCreateCost() {
+        double createConfCost = createConfigCost();
+        for (int i = 0; i < 1000; i++) {
+            create();
+        }
+        int fix_times = 10_000;
+        long start = System.nanoTime();
+        for (int i = 0; i < fix_times; i++) {
+            create();
+        }
+        double cost = ((double) (System.nanoTime() - start)) / 1000_000 / fix_times;
+        assertThat(cost * 500 < createConfCost);
+    }
+
+    @Test
+    public void testCreateCostWithRandomConfig() {
+        double createConfCost = createConfigCost();
+        for (int i = 0; i < 1000; i++) {
+            createRandomConfig();
+        }
+        int fix_times = 10_000;
+        long start = System.nanoTime();
+        for (int i = 0; i < fix_times; i++) {
+            createRandomConfig();
+        }
+        double cost = ((double) (System.nanoTime() - start)) / 1000_000 / fix_times;
+        assertThat(cost * 10 < createConfCost);
+    }
+
+    private double createConfigCost() {
+        for (int i = 0; i < 1000; i++) {
+            createConfig();
+        }
+        int fix_times = 10_000;
+        long start = System.nanoTime();
+        for (int i = 0; i < fix_times; i++) {
+            createConfig();
+        }
+        return ((double) (System.nanoTime() - start)) / 1000_000 / fix_times;
+    }
+
+    private void createConfig() {
+        org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
+        conf.set("a", "a");
+    }
+
+    private void create() {
+        Options options = new Options();
+        options.setString("haha", "1");
+        options.setString("compress", "zlib");
+        OrcFileFormat orcFileFormat =
+                new OrcFileFormatFactory().create(new FormatContext(options, 1024));
+    }
+
+    private void createRandomConfig() {
+        Options options = new Options();
+        options.setString("haha", "1");
+        options.setString("compress", "zlib");
+        options.setString("a", Math.random() + "");
+        OrcFileFormat orcFileFormat =
+                new OrcFileFormatFactory().create(new FormatContext(options, 1024));
+    }
 }

From efac5b6fdaffe1fba46db4e3d752f9f1021a0881 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Tue, 12 Nov 2024 10:52:45 +0000
Subject: [PATCH 15/29] [format] fix checkstyle

---
 .../paimon/format/orc/OrcFileFormatTest.java   | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
index 216c083eec66..3ca3ae6bc23a 100644
--- a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
+++ b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
@@ -90,12 +90,12 @@ public void testCreateCost() {
         for (int i = 0; i < 1000; i++) {
             create();
         }
-        int fix_times = 10_000;
+        int times = 10_000;
         long start = System.nanoTime();
-        for (int i = 0; i < fix_times; i++) {
+        for (int i = 0; i < times; i++) {
             create();
         }
-        double cost = ((double) (System.nanoTime() - start)) / 1000_000 / fix_times;
+        double cost = ((double) (System.nanoTime() - start)) / 1000_000 / times;
         assertThat(cost * 500 < createConfCost);
     }
 
@@ -105,12 +105,12 @@ public void testCreateCostWithRandomConfig() {
         for (int i = 0; i < 1000; i++) {
             createRandomConfig();
         }
-        int fix_times = 10_000;
+        int times = 10_000;
         long start = System.nanoTime();
-        for (int i = 0; i < fix_times; i++) {
+        for (int i = 0; i < times; i++) {
             createRandomConfig();
         }
-        double cost = ((double) (System.nanoTime() - start)) / 1000_000 / fix_times;
+        double cost = ((double) (System.nanoTime() - start)) / 1000_000 / times;
         assertThat(cost * 10 < createConfCost);
     }
 
@@ -118,12 +118,12 @@ private double createConfigCost() {
         for (int i = 0; i < 1000; i++) {
             createConfig();
         }
-        int fix_times = 10_000;
+        int times = 10_000;
         long start = System.nanoTime();
-        for (int i = 0; i < fix_times; i++) {
+        for (int i = 0; i < times; i++) {
             createConfig();
         }
-        return ((double) (System.nanoTime() - start)) / 1000_000 / fix_times;
+        return ((double) (System.nanoTime() - start)) / 1000_000 / times;
     }
 
     private void createConfig() {

From 15b191042870b77d6f74187712bcc5ef0a996b19 Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Tue, 12 Nov 2024 18:58:28 +0800
Subject: [PATCH 16/29] [format] fix version caused error

---
 .../java/org/apache/paimon/format/orc/OrcFileFormatTest.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
index 3ca3ae6bc23a..38957240816a 100644
--- a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
+++ b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
@@ -136,7 +136,7 @@ private void create() {
         options.setString("haha", "1");
         options.setString("compress", "zlib");
         OrcFileFormat orcFileFormat =
-                new OrcFileFormatFactory().create(new FormatContext(options, 1024));
+                new OrcFileFormatFactory().create(new FormatContext(options, 1024, 1024));
     }
 
     private void createRandomConfig() {
@@ -145,6 +145,6 @@ private void createRandomConfig() {
         options.setString("compress", "zlib");
         options.setString("a", Math.random() + "");
         OrcFileFormat orcFileFormat =
-                new OrcFileFormatFactory().create(new FormatContext(options, 1024));
+                new OrcFileFormatFactory().create(new FormatContext(options, 1024, 1024));
     }
 }

From e1b3406aa50873c2ec00cdb1d1537369a0a786ef Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Tue, 12 Nov 2024 11:05:07 +0000
Subject: [PATCH 17/29] [core] fix checkstyle

---
 .../paimon/operation/KeyValueFileStoreScanTest.java      | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java b/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java
index 8d6ab5aad37c..1d04d2f58f02 100644
--- a/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java
@@ -40,7 +40,14 @@
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.stream.Collectors;
 

From d48fff6100b9581e728af3be51e06108f6074625 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Wed, 13 Nov 2024 06:38:31 +0000
Subject: [PATCH 18/29] [format] add FormatPerformanceTest

---
 .../paimon/format/FileFormatFactory.java      |   3 +-
 .../org.apache.paimon.factories.Factory       |  18 ++++
 .../paimon/format/FormatPerformanceTest.java  | 101 ++++++++++++++++++
 .../paimon/format/orc/OrcFileFormatTest.java  |   4 +-
 4 files changed, 123 insertions(+), 3 deletions(-)
 create mode 100644 paimon-format/src/main/resources/META-INF/services/org.apache.paimon.factories.Factory
 create mode 100644 paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java

diff --git a/paimon-common/src/main/java/org/apache/paimon/format/FileFormatFactory.java b/paimon-common/src/main/java/org/apache/paimon/format/FileFormatFactory.java
index b726a84f24a2..d377cb2815c2 100644
--- a/paimon-common/src/main/java/org/apache/paimon/format/FileFormatFactory.java
+++ b/paimon-common/src/main/java/org/apache/paimon/format/FileFormatFactory.java
@@ -19,13 +19,14 @@
 package org.apache.paimon.format;
 
 import org.apache.paimon.annotation.VisibleForTesting;
+import org.apache.paimon.factories.Factory;
 import org.apache.paimon.options.MemorySize;
 import org.apache.paimon.options.Options;
 
 import javax.annotation.Nullable;
 
 /** Factory to create {@link FileFormat}. */
-public interface FileFormatFactory {
+public interface FileFormatFactory extends Factory {
 
     String identifier();
 
diff --git a/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.factories.Factory b/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.factories.Factory
new file mode 100644
index 000000000000..4aa1b23b3b7a
--- /dev/null
+++ b/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.factories.Factory
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.paimon.format.avro.AvroFileFormatFactory
+org.apache.paimon.format.orc.OrcFileFormatFactory
+org.apache.paimon.format.parquet.ParquetFileFormatFactory
\ No newline at end of file
diff --git a/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java b/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
new file mode 100644
index 000000000000..995c6143e0d0
--- /dev/null
+++ b/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format;
+
+import org.apache.paimon.factories.FactoryUtil;
+import org.apache.paimon.options.Options;
+
+import org.junit.Test;
+
+import java.util.ServiceLoader;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** test format performance. */
+public class FormatPerformanceTest {
+
+    @Test
+    public void testFormatPerformance() {
+        double objectCacheCost = newFileFormatWithObjectCache();
+        double serviceCost = newFileFormatWithServiceLoader();
+        double factoryUtilCost = newFileFormatWithFactoryUtil();
+        assertThat(objectCacheCost * 30 < serviceCost).isTrue();
+        assertThat(objectCacheCost * 30 < factoryUtilCost).isTrue();
+    }
+
+    private double newFileFormatWithServiceLoader() {
+        for (int i = 0; i < 1000; i++) {
+            loadFromIdentifier();
+        }
+        int times = 10_000;
+        long start = System.nanoTime();
+        int nothing = 0;
+        for (int i = 0; i < times; i++) {
+            nothing += loadFromIdentifier();
+        }
+        nothing = 0;
+        return ((double) (System.nanoTime() - start)) / 1000_000 / times + nothing;
+    }
+
+    private double newFileFormatWithObjectCache() {
+        for (int i = 0; i < 1000; i++) {
+            newFileFormat();
+        }
+        int times = 10_000;
+        long start = System.nanoTime();
+        for (int i = 0; i < times; i++) {
+            newFileFormat();
+        }
+        return ((double) (System.nanoTime() - start)) / 1000_000 / times;
+    }
+
+    private double newFileFormatWithFactoryUtil() {
+        for (int i = 0; i < 1000; i++) {
+            newFileFormatFromFactoryUtil();
+        }
+        int times = 10_000;
+        long start = System.nanoTime();
+        for (int i = 0; i < times; i++) {
+            newFileFormatFromFactoryUtil();
+        }
+        return ((double) (System.nanoTime() - start)) / 1000_000 / times;
+    }
+
+    private int loadFromIdentifier() {
+        ServiceLoader<FileFormatFactory> serviceLoader =
+                ServiceLoader.load(FileFormatFactory.class, FileFormat.class.getClassLoader());
+        int i = 0;
+        for (FileFormatFactory factory : serviceLoader) {
+            i++;
+        }
+        return i;
+    }
+
+    private FileFormat newFileFormat() {
+        FileFormat orc = FileFormat.fromIdentifier("orc", new Options());
+        return orc;
+    }
+
+    @Test
+    public void newFileFormatFromFactoryUtil() {
+        FileFormatFactory fileFormatFactory =
+                FactoryUtil.discoverFactory(
+                        FileFormatFactory.class.getClassLoader(), FileFormatFactory.class, "orc");
+    }
+}
diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
index 38957240816a..9e5769595c32 100644
--- a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
+++ b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java
@@ -96,7 +96,7 @@ public void testCreateCost() {
             create();
         }
         double cost = ((double) (System.nanoTime() - start)) / 1000_000 / times;
-        assertThat(cost * 500 < createConfCost);
+        assertThat(cost * 500 < createConfCost).isTrue();
     }
 
     @Test
@@ -111,7 +111,7 @@ public void testCreateCostWithRandomConfig() {
             createRandomConfig();
         }
         double cost = ((double) (System.nanoTime() - start)) / 1000_000 / times;
-        assertThat(cost * 10 < createConfCost);
+        assertThat(cost * 10 < createConfCost).isTrue();
     }
 
     private double createConfigCost() {

From a0efae22cea5eaa69d46dcda66157f508d740c6e Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Wed, 13 Nov 2024 06:51:27 +0000
Subject: [PATCH 19/29] [format][tests] FormatPerformanceTest change to 10
 times

---
 .../java/org/apache/paimon/format/FormatPerformanceTest.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java b/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
index 995c6143e0d0..ae5383aa0d27 100644
--- a/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
+++ b/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
@@ -35,8 +35,8 @@ public void testFormatPerformance() {
         double objectCacheCost = newFileFormatWithObjectCache();
         double serviceCost = newFileFormatWithServiceLoader();
         double factoryUtilCost = newFileFormatWithFactoryUtil();
-        assertThat(objectCacheCost * 30 < serviceCost).isTrue();
-        assertThat(objectCacheCost * 30 < factoryUtilCost).isTrue();
+        assertThat(objectCacheCost * 10 < serviceCost).isTrue();
+        assertThat(objectCacheCost * 10 < factoryUtilCost).isTrue();
     }
 
     private double newFileFormatWithServiceLoader() {

From 016620ca6e88a1511f03205c355e3354831463f4 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Wed, 13 Nov 2024 07:05:10 +0000
Subject: [PATCH 20/29] [format][tests] FormatPerformanceTest change to
 lessthan to pass github tests, may machine not diffrence

---
 .../java/org/apache/paimon/format/FormatPerformanceTest.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java b/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
index ae5383aa0d27..cd72bcc53dd4 100644
--- a/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
+++ b/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
@@ -35,8 +35,8 @@ public void testFormatPerformance() {
         double objectCacheCost = newFileFormatWithObjectCache();
         double serviceCost = newFileFormatWithServiceLoader();
         double factoryUtilCost = newFileFormatWithFactoryUtil();
-        assertThat(objectCacheCost * 10 < serviceCost).isTrue();
-        assertThat(objectCacheCost * 10 < factoryUtilCost).isTrue();
+        assertThat(objectCacheCost).isLessThan(serviceCost);
+        assertThat(objectCacheCost).isLessThan(factoryUtilCost);
     }
 
     private double newFileFormatWithServiceLoader() {

From 133a4918e80bd2d98d98536024b9e004a64fc95d Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Thu, 14 Nov 2024 06:19:28 +0000
Subject: [PATCH 21/29] [format] merge conflicts

---
 .../org/apache/paimon/format/FileFormat.java  |  39 ++-----
 .../operation/AbstractFileStoreScan.java      |   6 +-
 .../paimon/operation/FileStoreScan.java       |   3 +-
 .../table/source/AbstractDataTableScan.java   |   3 +-
 .../paimon/table/source/InnerTableScan.java   |   3 +-
 .../table/source/snapshot/SnapshotReader.java |   3 +-
 .../source/snapshot/SnapshotReaderImpl.java   |  11 +-
 .../paimon/table/system/AuditLogTable.java    |   3 +-
 .../operation/KeyValueFileStoreScanTest.java  |   3 +-
 ...org.apache.paimon.format.FileFormatFactory |  18 ----
 .../paimon/format/FormatPerformanceTest.java  | 101 ------------------
 11 files changed, 30 insertions(+), 163 deletions(-)
 delete mode 100644 paimon-format/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
 delete mode 100644 paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java

diff --git a/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java b/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
index ab75f0348584..83c929e94cec 100644
--- a/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
+++ b/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
@@ -19,8 +19,8 @@
 package org.apache.paimon.format;
 
 import org.apache.paimon.CoreOptions;
+import org.apache.paimon.factories.FactoryUtil;
 import org.apache.paimon.format.FileFormatFactory.FormatContext;
-import org.apache.paimon.fs.ObjectCacheManager;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.predicate.Predicate;
 import org.apache.paimon.statistics.SimpleColStatsCollector;
@@ -28,13 +28,11 @@
 
 import javax.annotation.Nullable;
 
-import java.time.Duration;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
-import java.util.ServiceLoader;
 
 /**
  * Factory class which creates reader and writer factories for specific file format.
@@ -43,9 +41,6 @@
  */
 public abstract class FileFormat {
 
-    private static final ObjectCacheManager<String, FileFormatFactory> formatFactoryCache =
-            ObjectCacheManager.newObjectCacheManager(Duration.ofDays(365), 1000);
-
     protected String formatIdentifier;
 
     protected FileFormat(String formatIdentifier) {
@@ -93,34 +88,12 @@ public static FileFormat fromIdentifier(String identifier, Options options) {
 
     /** Create a {@link FileFormat} from format identifier and format options. */
     public static FileFormat fromIdentifier(String identifier, FormatContext context) {
-        return fromIdentifier(identifier, context, FileFormat.class.getClassLoader())
-                .orElseThrow(
-                        () ->
-                                new RuntimeException(
-                                        String.format(
-                                                "Could not find a FileFormatFactory implementation class for %s format",
-                                                identifier)));
-    }
-
-    private static Optional<FileFormat> fromIdentifier(
-            String formatIdentifier, FormatContext context, ClassLoader classLoader) {
-
         FileFormatFactory fileFormatFactory =
-                formatFactoryCache.getIfPresent(formatIdentifier.toLowerCase());
-        if (fileFormatFactory != null) {
-            return Optional.of(fileFormatFactory.create(context));
-        }
-
-        ServiceLoader<FileFormatFactory> serviceLoader =
-                ServiceLoader.load(FileFormatFactory.class, classLoader);
-        for (FileFormatFactory factory : serviceLoader) {
-            formatFactoryCache.put(factory.identifier(), factory);
-            if (factory.identifier().equals(formatIdentifier.toLowerCase())) {
-                return Optional.of(factory.create(context));
-            }
-        }
-
-        return Optional.empty();
+                FactoryUtil.discoverFactory(
+                        FileFormatFactory.class.getClassLoader(),
+                        FileFormatFactory.class,
+                        identifier);
+        return fileFormatFactory.create(context);
     }
 
     protected Options getIdentifierPrefixOptions(Options options) {
diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java
index 99e8b411fbaf..deb6fadd7c19 100644
--- a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java
@@ -85,7 +85,7 @@ public abstract class AbstractFileStoreScan implements FileStoreScan {
 
     private Snapshot specifiedSnapshot = null;
     private Filter<Integer> bucketFilter = null;
-    private List<Integer> buckets;
+    private Collection<Integer> buckets;
     private BiFilter<Integer, Integer> totalAwareBucketFilter = null;
     private List<ManifestFileMeta> specifiedManifests = null;
     protected ScanMode scanMode = ScanMode.ALL;
@@ -138,7 +138,7 @@ public FileStoreScan withBucket(int bucket) {
     }
 
     @Override
-    public FileStoreScan withBuckets(List<Integer> buckets) {
+    public FileStoreScan withBuckets(Collection<Integer> buckets) {
         this.bucketFilter = buckets::contains;
         this.buckets = buckets;
         return this;
@@ -444,7 +444,7 @@ private Filter<InternalRow> createCacheRowFilter() {
      *
      * <p>Implemented to {@link InternalRow} is for performance (No deserialization).
      */
-    private static List<Predicate> createPushDownFilter(List<Integer> buckets) {
+    private static List<Predicate> createPushDownFilter(Collection<Integer> buckets) {
         if (buckets == null || buckets.isEmpty()) {
             return null;
         }
diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreScan.java b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreScan.java
index 65af1e6fe259..89e7e7aace90 100644
--- a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreScan.java
@@ -38,6 +38,7 @@
 import javax.annotation.Nullable;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -57,7 +58,7 @@ public interface FileStoreScan {
 
     FileStoreScan withBucket(int bucket);
 
-    FileStoreScan withBuckets(List<Integer> buckets);
+    FileStoreScan withBuckets(Collection<Integer> buckets);
 
     FileStoreScan withBucketFilter(Filter<Integer> bucketFilter);
 
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractDataTableScan.java b/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractDataTableScan.java
index 4b071ba617ab..9299b86aa420 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractDataTableScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractDataTableScan.java
@@ -48,6 +48,7 @@
 import org.apache.paimon.utils.Pair;
 import org.apache.paimon.utils.SnapshotManager;
 
+import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -80,7 +81,7 @@ public AbstractDataTableScan withBucketFilter(Filter<Integer> bucketFilter) {
     }
 
     @Override
-    public AbstractDataTableScan withBuckets(List<Integer> buckets) {
+    public AbstractDataTableScan withBuckets(Collection<Integer> buckets) {
         snapshotReader.withBuckets(buckets);
         return this;
     }
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java b/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java
index 5dfd83640f5d..6c3b28f0f4ce 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java
@@ -23,6 +23,7 @@
 import org.apache.paimon.predicate.Predicate;
 import org.apache.paimon.utils.Filter;
 
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -52,7 +53,7 @@ default InnerTableScan withBucket(Integer bucket) {
         return withBuckets(Collections.singletonList(bucket));
     }
 
-    default InnerTableScan withBuckets(List<Integer> buckets) {
+    default InnerTableScan withBuckets(Collection<Integer> buckets) {
         throw new RuntimeException("not impl withBuckets for " + this.getClass().getName());
     }
 
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
index 76a550ba4ce6..026e529dc0c8 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
@@ -39,6 +39,7 @@
 
 import javax.annotation.Nullable;
 
+import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -81,7 +82,7 @@ public interface SnapshotReader {
 
     SnapshotReader withBucket(int bucket);
 
-    SnapshotReader withBuckets(List<Integer> buckets);
+    SnapshotReader withBuckets(Collection<Integer> buckets);
 
     SnapshotReader withBucketFilter(Filter<Integer> bucketFilter);
 
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
index 74f257ddcf9d..c3f027da962f 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
@@ -53,6 +53,7 @@
 import javax.annotation.Nullable;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -246,7 +247,7 @@ public SnapshotReader withBucket(int bucket) {
         return this;
     }
 
-    public SnapshotReader withBuckets(List<Integer> buckets) {
+    public SnapshotReader withBuckets(Collection<Integer> buckets) {
         scan.withBuckets(buckets);
         return this;
     }
@@ -277,7 +278,13 @@ public SnapshotReader withShard(int indexOfThisSubtask, int numberOfParallelSubt
                             Math.abs(file.hashCode() % numberOfParallelSubtasks)
                                     == indexOfThisSubtask);
         } else {
-            withBucketFilter(bucket -> bucket % numberOfParallelSubtasks == indexOfThisSubtask);
+            Set<Integer> buckets = new HashSet<>();
+            for (int bucket = 0; bucket < this.tableSchema.numBuckets(); bucket++) {
+                if (bucket % numberOfParallelSubtasks == indexOfThisSubtask) {
+                    buckets.add(bucket);
+                }
+            }
+            withBuckets(buckets);
         }
         return this;
     }
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
index a11e88074932..2e1d913648e7 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
@@ -69,6 +69,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -331,7 +332,7 @@ public SnapshotReader withBucket(int bucket) {
         }
 
         @Override
-        public SnapshotReader withBuckets(List<Integer> buckets) {
+        public SnapshotReader withBuckets(Collection<Integer> buckets) {
             wrapped.withBuckets(buckets);
             return this;
         }
diff --git a/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java b/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java
index 1d04d2f58f02..07b2fc48e078 100644
--- a/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/operation/KeyValueFileStoreScanTest.java
@@ -235,7 +235,8 @@ public void testWithBuckets() throws Exception {
         int wantedBucket1 = random.nextInt(NUM_BUCKETS);
         int wantedBucket2 = random.nextInt(NUM_BUCKETS);
         int wantedBucket3 = random.nextInt(NUM_BUCKETS);
-        List<Integer> buckets = Arrays.asList(wantedBucket1, wantedBucket2, wantedBucket3);
+        Set<Integer> buckets =
+                new HashSet<>(Arrays.asList(wantedBucket1, wantedBucket2, wantedBucket3));
 
         FileStoreScan scan = store.newScan();
         scan.withSnapshot(snapshot.id());
diff --git a/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory b/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
deleted file mode 100644
index 7af6f79b3493..000000000000
--- a/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-org.apache.paimon.format.avro.AvroFileFormatFactory
-org.apache.paimon.format.orc.OrcFileFormatFactory
-org.apache.paimon.format.parquet.ParquetFileFormatFactory
diff --git a/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java b/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
deleted file mode 100644
index cd72bcc53dd4..000000000000
--- a/paimon-format/src/test/java/org/apache/paimon/format/FormatPerformanceTest.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.paimon.format;
-
-import org.apache.paimon.factories.FactoryUtil;
-import org.apache.paimon.options.Options;
-
-import org.junit.Test;
-
-import java.util.ServiceLoader;
-
-import static org.assertj.core.api.Assertions.assertThat;
-
-/** test format performance. */
-public class FormatPerformanceTest {
-
-    @Test
-    public void testFormatPerformance() {
-        double objectCacheCost = newFileFormatWithObjectCache();
-        double serviceCost = newFileFormatWithServiceLoader();
-        double factoryUtilCost = newFileFormatWithFactoryUtil();
-        assertThat(objectCacheCost).isLessThan(serviceCost);
-        assertThat(objectCacheCost).isLessThan(factoryUtilCost);
-    }
-
-    private double newFileFormatWithServiceLoader() {
-        for (int i = 0; i < 1000; i++) {
-            loadFromIdentifier();
-        }
-        int times = 10_000;
-        long start = System.nanoTime();
-        int nothing = 0;
-        for (int i = 0; i < times; i++) {
-            nothing += loadFromIdentifier();
-        }
-        nothing = 0;
-        return ((double) (System.nanoTime() - start)) / 1000_000 / times + nothing;
-    }
-
-    private double newFileFormatWithObjectCache() {
-        for (int i = 0; i < 1000; i++) {
-            newFileFormat();
-        }
-        int times = 10_000;
-        long start = System.nanoTime();
-        for (int i = 0; i < times; i++) {
-            newFileFormat();
-        }
-        return ((double) (System.nanoTime() - start)) / 1000_000 / times;
-    }
-
-    private double newFileFormatWithFactoryUtil() {
-        for (int i = 0; i < 1000; i++) {
-            newFileFormatFromFactoryUtil();
-        }
-        int times = 10_000;
-        long start = System.nanoTime();
-        for (int i = 0; i < times; i++) {
-            newFileFormatFromFactoryUtil();
-        }
-        return ((double) (System.nanoTime() - start)) / 1000_000 / times;
-    }
-
-    private int loadFromIdentifier() {
-        ServiceLoader<FileFormatFactory> serviceLoader =
-                ServiceLoader.load(FileFormatFactory.class, FileFormat.class.getClassLoader());
-        int i = 0;
-        for (FileFormatFactory factory : serviceLoader) {
-            i++;
-        }
-        return i;
-    }
-
-    private FileFormat newFileFormat() {
-        FileFormat orc = FileFormat.fromIdentifier("orc", new Options());
-        return orc;
-    }
-
-    @Test
-    public void newFileFormatFromFactoryUtil() {
-        FileFormatFactory fileFormatFactory =
-                FactoryUtil.discoverFactory(
-                        FileFormatFactory.class.getClassLoader(), FileFormatFactory.class, "orc");
-    }
-}

From 282a2c99ca34935a3a74f5d3a141e689fa828a36 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Thu, 14 Nov 2024 06:59:40 +0000
Subject: [PATCH 22/29] [format] id to lowercase

---
 .../src/main/java/org/apache/paimon/format/FileFormat.java     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java b/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
index 83c929e94cec..9ddb58257324 100644
--- a/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
+++ b/paimon-common/src/main/java/org/apache/paimon/format/FileFormat.java
@@ -88,6 +88,9 @@ public static FileFormat fromIdentifier(String identifier, Options options) {
 
     /** Create a {@link FileFormat} from format identifier and format options. */
     public static FileFormat fromIdentifier(String identifier, FormatContext context) {
+        if (identifier != null) {
+            identifier = identifier.toLowerCase();
+        }
         FileFormatFactory fileFormatFactory =
                 FactoryUtil.discoverFactory(
                         FileFormatFactory.class.getClassLoader(),

From 884d12f772d7e56807a3a2826a0c8d7080e0dc93 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Thu, 14 Nov 2024 07:42:34 +0000
Subject: [PATCH 23/29] [tests] core org.apache.paimon.factories.Factory

---
 .../services/org.apache.paimon.factories.Factory |  1 +
 .../org.apache.paimon.format.FileFormatFactory   | 16 ----------------
 2 files changed, 1 insertion(+), 16 deletions(-)
 delete mode 100644 paimon-core/src/test/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory

diff --git a/paimon-core/src/test/resources/META-INF/services/org.apache.paimon.factories.Factory b/paimon-core/src/test/resources/META-INF/services/org.apache.paimon.factories.Factory
index 7eb517ab9835..5e87f5e9ea2e 100644
--- a/paimon-core/src/test/resources/META-INF/services/org.apache.paimon.factories.Factory
+++ b/paimon-core/src/test/resources/META-INF/services/org.apache.paimon.factories.Factory
@@ -13,4 +13,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+org.apache.paimon.format.FileStatsExtractingAvroFormatFactory
 org.apache.paimon.mergetree.compact.aggregate.TestCustomAggFactory
\ No newline at end of file
diff --git a/paimon-core/src/test/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory b/paimon-core/src/test/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
deleted file mode 100644
index 14386c45f21b..000000000000
--- a/paimon-core/src/test/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-org.apache.paimon.format.FileStatsExtractingAvroFormatFactory

From a1cc9f46ffccfbe886ec11d825310d4410f32b46 Mon Sep 17 00:00:00 2001
From: "ranxianglei.rxl" <ranxianglei.rxl@alibaba-inc.com>
Date: Thu, 14 Nov 2024 16:38:38 +0800
Subject: [PATCH 24/29] [tests] fileFormat factories add to paimon-flink-common

---
 .../org.apache.paimon.factories.Factory        |  5 +++++
 .../org.apache.paimon.format.FileFormatFactory | 18 ------------------
 2 files changed, 5 insertions(+), 18 deletions(-)
 delete mode 100644 paimon-flink/paimon-flink-common/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory

diff --git a/paimon-flink/paimon-flink-common/src/main/resources/META-INF/services/org.apache.paimon.factories.Factory b/paimon-flink/paimon-flink-common/src/main/resources/META-INF/services/org.apache.paimon.factories.Factory
index 0ff3ac1f1e1c..e5d063979da6 100644
--- a/paimon-flink/paimon-flink-common/src/main/resources/META-INF/services/org.apache.paimon.factories.Factory
+++ b/paimon-flink/paimon-flink-common/src/main/resources/META-INF/services/org.apache.paimon.factories.Factory
@@ -80,3 +80,8 @@ org.apache.paimon.flink.procedure.MarkPartitionDoneProcedure
 org.apache.paimon.flink.procedure.CloneProcedure
 org.apache.paimon.flink.procedure.CompactManifestProcedure
 org.apache.paimon.flink.procedure.RefreshObjectTableProcedure
+
+### fileFormat factories
+org.apache.paimon.flink.compact.changelog.format.CompactedChangelogReadOnlyFormat$OrcFactory
+org.apache.paimon.flink.compact.changelog.format.CompactedChangelogReadOnlyFormat$ParquetFactory
+org.apache.paimon.flink.compact.changelog.format.CompactedChangelogReadOnlyFormat$AvroFactory
diff --git a/paimon-flink/paimon-flink-common/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory b/paimon-flink/paimon-flink-common/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
deleted file mode 100644
index 6e7553d5c668..000000000000
--- a/paimon-flink/paimon-flink-common/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-org.apache.paimon.flink.compact.changelog.format.CompactedChangelogReadOnlyFormat$OrcFactory
-org.apache.paimon.flink.compact.changelog.format.CompactedChangelogReadOnlyFormat$ParquetFactory
-org.apache.paimon.flink.compact.changelog.format.CompactedChangelogReadOnlyFormat$AvroFactory

From e401844047ac742e8cc2ebd64326735ae3454989 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Fri, 15 Nov 2024 07:20:44 +0000
Subject: [PATCH 25/29] [core] resolve withBuckets commit

---
 .../operation/AbstractFileStoreScan.java       |  5 ++---
 .../table/FallbackReadFileStoreTable.java      |  9 +++++++++
 .../paimon/table/source/InnerTableScan.java    |  3 ++-
 .../paimon/table/system/AuditLogTable.java     | 18 ++++++++++++++++++
 4 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java
index deb6fadd7c19..306fd6f5a7c8 100644
--- a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreScan.java
@@ -440,9 +440,8 @@ private Filter<InternalRow> createCacheRowFilter() {
     }
 
     /**
-     * Read the corresponding entries based on the current required partition and bucket.
-     *
-     * <p>Implemented to {@link InternalRow} is for performance (No deserialization).
+     * Read the corresponding entries based on the current required bucket, but push down into file
+     * format .
      */
     private static List<Predicate> createPushDownFilter(Collection<Integer> buckets) {
         if (buckets == null || buckets.isEmpty()) {
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/FallbackReadFileStoreTable.java b/paimon-core/src/main/java/org/apache/paimon/table/FallbackReadFileStoreTable.java
index f1a60b9713f9..663933f337b5 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/FallbackReadFileStoreTable.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/FallbackReadFileStoreTable.java
@@ -34,6 +34,7 @@
 import org.apache.paimon.table.source.DataSplit;
 import org.apache.paimon.table.source.DataTableScan;
 import org.apache.paimon.table.source.InnerTableRead;
+import org.apache.paimon.table.source.InnerTableScan;
 import org.apache.paimon.table.source.Split;
 import org.apache.paimon.table.source.TableRead;
 import org.apache.paimon.table.source.TableScan;
@@ -44,6 +45,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -266,6 +268,13 @@ public Scan withBucketFilter(Filter<Integer> bucketFilter) {
             return this;
         }
 
+        @Override
+        public InnerTableScan withBuckets(Collection<Integer> buckets) {
+            mainScan.withBuckets(buckets);
+            fallbackScan.withBuckets(buckets);
+            return this;
+        }
+
         @Override
         public Scan withLevelFilter(Filter<Integer> levelFilter) {
             mainScan.withLevelFilter(levelFilter);
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java b/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java
index 6c3b28f0f4ce..1e17e001694e 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/source/InnerTableScan.java
@@ -54,7 +54,8 @@ default InnerTableScan withBucket(Integer bucket) {
     }
 
     default InnerTableScan withBuckets(Collection<Integer> buckets) {
-        throw new RuntimeException("not impl withBuckets for " + this.getClass().getName());
+        // return this is not safe for too many class not impl this method and withBucketFilter
+        return this;
     }
 
     default InnerTableScan withLevelFilter(Filter<Integer> levelFilter) {
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
index 2e1d913648e7..5105f6fc8fd2 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
@@ -441,6 +441,12 @@ public InnerTableScan withBucketFilter(Filter<Integer> bucketFilter) {
             return this;
         }
 
+        @Override
+        public InnerTableScan withBuckets(Collection<Integer> buckets) {
+            batchScan.withBuckets(buckets);
+            return this;
+        }
+
         @Override
         public InnerTableScan withLevelFilter(Filter<Integer> levelFilter) {
             batchScan.withLevelFilter(levelFilter);
@@ -478,6 +484,18 @@ public StreamDataTableScan withFilter(Predicate predicate) {
             return this;
         }
 
+        @Override
+        public InnerTableScan withBucketFilter(Filter<Integer> bucketFilter) {
+            streamScan.withBucketFilter(bucketFilter);
+            return this;
+        }
+
+        @Override
+        public InnerTableScan withBuckets(Collection<Integer> buckets) {
+            streamScan.withBuckets(buckets);
+            return this;
+        }
+
         @Override
         public StartingContext startingContext() {
             return streamScan.startingContext();

From 710af068b40656a151a1b7d4faa91043d49318d7 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Fri, 15 Nov 2024 07:49:32 +0000
Subject: [PATCH 26/29] [format] no need call rowMapper under getArray

---
 .../apache/paimon/format/orc/reader/OrcArrayColumnVector.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
index 25a1935f3e4b..6aeb8c98892e 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcArrayColumnVector.java
@@ -42,7 +42,7 @@ public OrcArrayColumnVector(
 
     @Override
     public InternalArray getArray(int i) {
-        i = rowMapper(i);
+        // no need call rowMapper(i) here .
         long offset = hiveVector.offsets[i];
         long length = hiveVector.lengths[i];
         return new ColumnarArray(paimonVector, (int) offset, (int) length);

From 669dc30770f70a4103300eb6df607fd1038a3553 Mon Sep 17 00:00:00 2001
From: ranxianglei <ranxianglei.rxl@taobao.com>
Date: Mon, 18 Nov 2024 04:05:50 +0000
Subject: [PATCH 27/29] [core] cancel manifest format factory cache for while .

---
 .../apache/paimon/manifest/ManifestFile.java  | 48 +------------------
 1 file changed, 2 insertions(+), 46 deletions(-)

diff --git a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
index 541cf406472e..41b480e8a427 100644
--- a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
+++ b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
@@ -24,7 +24,6 @@
 import org.apache.paimon.format.FormatWriterFactory;
 import org.apache.paimon.format.SimpleStatsCollector;
 import org.apache.paimon.fs.FileIO;
-import org.apache.paimon.fs.ObjectCacheManager;
 import org.apache.paimon.fs.Path;
 import org.apache.paimon.io.RollingFileWriter;
 import org.apache.paimon.io.SingleFileWriter;
@@ -41,9 +40,7 @@
 import javax.annotation.Nullable;
 
 import java.io.IOException;
-import java.time.Duration;
 import java.util.List;
-import java.util.Objects;
 
 /**
  * This file includes several {@link ManifestEntry}s, representing the additional changes since last
@@ -204,57 +201,16 @@ public ManifestFile create() {
             return create(null);
         }
 
-        private static class FormatReaderFactoryKey {
-            private final RowType entryType;
-            private final List<Predicate> filters;
-            private final String formatIdentifier;
-
-            public FormatReaderFactoryKey(
-                    String formatIdentifier, RowType entryType, List<Predicate> filters) {
-                this.entryType = entryType;
-                this.filters = filters;
-                this.formatIdentifier = formatIdentifier;
-            }
-
-            @Override
-            public boolean equals(Object o) {
-                if (o == null || getClass() != o.getClass()) {
-                    return false;
-                }
-                FormatReaderFactoryKey that = (FormatReaderFactoryKey) o;
-                return Objects.equals(entryType, that.entryType)
-                        && Objects.equals(filters, that.filters)
-                        && Objects.equals(formatIdentifier, that.formatIdentifier);
-            }
-
-            @Override
-            public int hashCode() {
-                return Objects.hash(entryType, filters, formatIdentifier);
-            }
-        }
-
-        private static final ObjectCacheManager<FormatReaderFactoryKey, FormatReaderFactory>
-                readers = ObjectCacheManager.newObjectCacheManager(Duration.ofDays(365), 1000);
-        private static final ObjectCacheManager<FormatReaderFactoryKey, FormatWriterFactory>
-                writers = ObjectCacheManager.newObjectCacheManager(Duration.ofDays(365), 1000);
-
         public ManifestFile create(List<Predicate> filters) {
-            String formatIdentifier = this.fileFormat.getFormatIdentifier();
             RowType entryType = VersionedObjectSerializer.versionType(ManifestEntry.SCHEMA);
-            FormatReaderFactoryKey formatReaderFactoryKey =
-                    new FormatReaderFactoryKey(formatIdentifier, entryType, filters);
             return new ManifestFile(
                     fileIO,
                     schemaManager,
                     partitionType,
                     ManifestEntrySerializer.getInstance(),
                     entryType,
-                    readers.get(
-                            formatReaderFactoryKey,
-                            (ignore) -> fileFormat.createReaderFactory(entryType, filters)),
-                    writers.get(
-                            formatReaderFactoryKey,
-                            (ignore) -> fileFormat.createWriterFactory(entryType)),
+                    fileFormat.createReaderFactory(entryType, filters),
+                    fileFormat.createWriterFactory(entryType),
                     compression,
                     pathFactory.manifestFileFactory(),
                     suggestedFileSize,

From 230847547ffab5dc81919abbd6448bb30a927d19 Mon Sep 17 00:00:00 2001
From: "aiden.dong" <782112163@qq.com>
Date: Tue, 3 Dec 2024 13:33:44 +0800
Subject: [PATCH 28/29] [core] Optimization of Parquet Predicate Pushdown
 Capability (#4608)

---
 .../table/PrimaryKeyFileStoreTableTest.java   |  63 ++++++
 .../format/parquet/ParquetReaderFactory.java  |  66 +++++-
 .../parquet/reader/AbstractColumnReader.java  | 204 +++++++++++++-----
 .../parquet/reader/BooleanColumnReader.java   |  36 +++-
 .../parquet/reader/ByteColumnReader.java      |  39 +++-
 .../parquet/reader/BytesColumnReader.java     |  41 +++-
 .../parquet/reader/DoubleColumnReader.java    |  38 +++-
 .../reader/FixedLenBytesColumnReader.java     |  36 +++-
 .../parquet/reader/FloatColumnReader.java     |  38 +++-
 .../parquet/reader/IntColumnReader.java       |  39 +++-
 .../parquet/reader/LongColumnReader.java      |  39 +++-
 .../parquet/reader/NestedColumnReader.java    |   2 +-
 .../reader/NestedPrimitiveColumnReader.java   | 141 +++++++-----
 .../parquet/reader/ParquetReadState.java      | 148 +++++++++++++
 .../reader/ParquetSplitReaderUtil.java        |  41 ++--
 .../parquet/reader/RunLengthDecoder.java      |  45 ++++
 .../parquet/reader/ShortColumnReader.java     |  38 +++-
 .../parquet/reader/TimestampColumnReader.java |  15 +-
 18 files changed, 898 insertions(+), 171 deletions(-)
 create mode 100644 paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetReadState.java

diff --git a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
index dca86aa61ec2..e65db5ce015e 100644
--- a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
@@ -84,6 +84,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Random;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.BiFunction;
@@ -826,6 +827,68 @@ public void testDeletionVectorsWithFileIndexInFile() throws Exception {
                                 "1|4|500|binary|varbinary|mapKey:mapVal|multiset"));
     }
 
+    @Test
+    public void testDeletionVectorsWithParquetFilter() throws Exception {
+        FileStoreTable table =
+                createFileStoreTable(
+                        conf -> {
+                            conf.set(BUCKET, 1);
+                            conf.set(DELETION_VECTORS_ENABLED, true);
+                            conf.set(FILE_FORMAT, "parquet");
+                            conf.set("parquet.block.size", "1048576");
+                            conf.set("parquet.page.size", "1024");
+                        });
+
+        BatchWriteBuilder writeBuilder = table.newBatchWriteBuilder();
+
+        BatchTableWrite write =
+                (BatchTableWrite)
+                        writeBuilder
+                                .newWrite()
+                                .withIOManager(new IOManagerImpl(tempDir.toString()));
+
+        for (int i = 0; i < 200000; i++) {
+            write.write(rowData(1, i, i * 100L));
+        }
+
+        List<CommitMessage> messages = write.prepareCommit();
+        BatchTableCommit commit = writeBuilder.newCommit();
+        commit.commit(messages);
+        write =
+                (BatchTableWrite)
+                        writeBuilder
+                                .newWrite()
+                                .withIOManager(new IOManagerImpl(tempDir.toString()));
+        for (int i = 180000; i < 200000; i++) {
+            write.write(rowDataWithKind(RowKind.DELETE, 1, i, i * 100L));
+        }
+
+        messages = write.prepareCommit();
+        commit = writeBuilder.newCommit();
+        commit.commit(messages);
+
+        PredicateBuilder builder = new PredicateBuilder(ROW_TYPE);
+        List<Split> splits = toSplits(table.newSnapshotReader().read().dataSplits());
+        Random random = new Random();
+
+        for (int i = 0; i < 10; i++) {
+            int value = random.nextInt(180000);
+            TableRead read = table.newRead().withFilter(builder.equal(1, value)).executeFilter();
+            assertThat(getResult(read, splits, BATCH_ROW_TO_STRING))
+                    .isEqualTo(
+                            Arrays.asList(
+                                    String.format(
+                                            "%d|%d|%d|binary|varbinary|mapKey:mapVal|multiset",
+                                            1, value, value * 100L)));
+        }
+
+        for (int i = 0; i < 10; i++) {
+            int value = 180000 + random.nextInt(20000);
+            TableRead read = table.newRead().withFilter(builder.equal(1, value)).executeFilter();
+            assertThat(getResult(read, splits, BATCH_ROW_TO_STRING)).isEmpty();
+        }
+    }
+
     @Test
     public void testDeletionVectorsWithFileIndexInMeta() throws Exception {
         FileStoreTable table =
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
index 2a62c0bc8947..357a8a0a539e 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
@@ -27,6 +27,7 @@
 import org.apache.paimon.format.FormatReaderFactory;
 import org.apache.paimon.format.parquet.reader.ColumnReader;
 import org.apache.paimon.format.parquet.reader.ParquetDecimalVector;
+import org.apache.paimon.format.parquet.reader.ParquetReadState;
 import org.apache.paimon.format.parquet.reader.ParquetTimestampVector;
 import org.apache.paimon.format.parquet.type.ParquetField;
 import org.apache.paimon.fs.Path;
@@ -129,7 +130,7 @@ public ParquetReader createReader(FormatReaderFactory.Context context) throws IO
                 buildFieldsList(projectedType.getFields(), projectedType.getFieldNames(), columnIO);
 
         return new ParquetReader(
-                reader, requestedSchema, reader.getRecordCount(), poolOfBatches, fields);
+                reader, requestedSchema, reader.getFilteredRecordCount(), poolOfBatches, fields);
     }
 
     private void setReadOptions(ParquetReadOptions.Builder builder) {
@@ -332,6 +333,10 @@ private class ParquetReader implements RecordReader<InternalRow> {
 
         private long nextRowPosition;
 
+        private ParquetReadState currentRowGroupReadState;
+
+        private long currentRowGroupFirstRowIndex;
+
         /**
          * For each request column, the reader to read this column. This is NULL if this column is
          * missing from the file, in which case we populate the attribute with NULL.
@@ -355,6 +360,7 @@ private ParquetReader(
             this.totalCountLoadedSoFar = 0;
             this.currentRowPosition = 0;
             this.nextRowPosition = 0;
+            this.currentRowGroupFirstRowIndex = 0;
             this.fields = fields;
         }
 
@@ -386,7 +392,8 @@ private boolean nextBatch(ParquetReaderBatch batch) throws IOException {
                 currentRowPosition = nextRowPosition;
             }
 
-            int num = (int) Math.min(batchSize, totalCountLoadedSoFar - rowsReturned);
+            int num = getBachSize();
+
             for (int i = 0; i < columnReaders.length; ++i) {
                 if (columnReaders[i] == null) {
                     batch.writableVectors[i].fillWithNulls();
@@ -396,13 +403,13 @@ private boolean nextBatch(ParquetReaderBatch batch) throws IOException {
                 }
             }
             rowsReturned += num;
-            nextRowPosition = currentRowPosition + num;
+            nextRowPosition = getNextRowPosition(num);
             batch.columnarBatch.setNumRows(num);
             return true;
         }
 
         private void readNextRowGroup() throws IOException {
-            PageReadStore rowGroup = reader.readNextRowGroup();
+            PageReadStore rowGroup = reader.readNextFilteredRowGroup();
             if (rowGroup == null) {
                 throw new IOException(
                         "expecting more rows but reached last block. Read "
@@ -411,6 +418,9 @@ private void readNextRowGroup() throws IOException {
                                 + totalRowCount);
             }
 
+            this.currentRowGroupReadState =
+                    new ParquetReadState(rowGroup.getRowIndexes().orElse(null));
+
             List<Type> types = requestedSchema.getFields();
             columnReaders = new ColumnReader[types.size()];
             for (int i = 0; i < types.size(); ++i) {
@@ -425,18 +435,62 @@ private void readNextRowGroup() throws IOException {
                                     0);
                 }
             }
+
             totalCountLoadedSoFar += rowGroup.getRowCount();
-            if (rowGroup.getRowIndexOffset().isPresent()) {
-                currentRowPosition = rowGroup.getRowIndexOffset().get();
+
+            if (rowGroup.getRowIndexOffset().isPresent()) { // filter
+                currentRowGroupFirstRowIndex = rowGroup.getRowIndexOffset().get();
+                long pageIndex = 0;
+                if (!this.currentRowGroupReadState.isMaxRange()) {
+                    pageIndex = this.currentRowGroupReadState.currentRangeStart();
+                }
+                currentRowPosition = currentRowGroupFirstRowIndex + pageIndex;
             } else {
                 if (reader.rowGroupsFiltered()) {
                     throw new RuntimeException(
                             "There is a bug, rowIndexOffset must be present when row groups are filtered.");
                 }
+                currentRowGroupFirstRowIndex = nextRowPosition;
                 currentRowPosition = nextRowPosition;
             }
         }
 
+        private int getBachSize() throws IOException {
+
+            long rangeBatchSize = Long.MAX_VALUE;
+            if (this.currentRowGroupReadState.isFinished()) {
+                throw new IOException(
+                        "expecting more rows but reached last page block. Read "
+                                + rowsReturned
+                                + " out of "
+                                + totalRowCount);
+            } else if (!this.currentRowGroupReadState.isMaxRange()) {
+                long pageIndex = this.currentRowPosition - this.currentRowGroupFirstRowIndex;
+                rangeBatchSize = this.currentRowGroupReadState.currentRangeEnd() - pageIndex + 1;
+            }
+
+            return (int)
+                    Math.min(
+                            batchSize,
+                            Math.min(rangeBatchSize, totalCountLoadedSoFar - rowsReturned));
+        }
+
+        private long getNextRowPosition(int num) {
+            if (this.currentRowGroupReadState.isMaxRange()) {
+                return this.currentRowPosition + num;
+            } else {
+                long pageIndex = this.currentRowPosition - this.currentRowGroupFirstRowIndex;
+                long nextIndex = pageIndex + num;
+
+                if (this.currentRowGroupReadState.currentRangeEnd() < nextIndex) {
+                    this.currentRowGroupReadState.nextRange();
+                    nextIndex = this.currentRowGroupReadState.currentRangeStart();
+                }
+
+                return nextIndex;
+            }
+        }
+
         private ParquetReaderBatch getCachedEntry() throws IOException {
             try {
                 return pool.pollEntry();
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/AbstractColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/AbstractColumnReader.java
index 7e2ab6d5e7f0..5e3f4a7e6a33 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/AbstractColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/AbstractColumnReader.java
@@ -32,6 +32,7 @@
 import org.apache.parquet.column.page.DataPageV1;
 import org.apache.parquet.column.page.DataPageV2;
 import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.column.page.PageReader;
 import org.apache.parquet.column.values.ValuesReader;
 import org.apache.parquet.io.ParquetDecodingException;
@@ -65,20 +66,16 @@ public abstract class AbstractColumnReader<VECTOR extends WritableColumnVector>
 
     protected final ColumnDescriptor descriptor;
 
-    /** Total number of values read. */
-    private long valuesRead;
-
-    /**
-     * value that indicates the end of the current page. That is, if valuesRead ==
-     * endOfPageValueCount, we are at the end of the page.
-     */
-    private long endOfPageValueCount;
-
     /** If true, the current page is dictionary encoded. */
     private boolean isCurrentPageDictionaryEncoded;
 
     /** Total values in the current page. */
-    private int pageValueCount;
+    //    private int pageValueCount;
+
+    /**
+     * Helper struct to track intermediate states while reading Parquet pages in the column chunk.
+     */
+    private final ParquetReadState readState;
 
     /*
      * Input streams:
@@ -101,12 +98,14 @@ public abstract class AbstractColumnReader<VECTOR extends WritableColumnVector>
     /** Dictionary decoder to wrap dictionary ids input stream. */
     private RunLengthDecoder dictionaryIdsDecoder;
 
-    public AbstractColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
+    public AbstractColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadStore)
             throws IOException {
         this.descriptor = descriptor;
-        this.pageReader = pageReader;
+        this.pageReader = pageReadStore.getPageReader(descriptor);
         this.maxDefLevel = descriptor.getMaxDefinitionLevel();
 
+        this.readState = new ParquetReadState(pageReadStore.getRowIndexes().orElse(null));
+
         DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
         if (dictionaryPage != null) {
             try {
@@ -147,56 +146,136 @@ public final void readToVector(int readNumber, VECTOR vector) throws IOException
         if (dictionary != null) {
             dictionaryIds = vector.reserveDictionaryIds(readNumber);
         }
-        while (readNumber > 0) {
+
+        readState.resetForNewBatch(readNumber);
+
+        while (readState.rowsToReadInBatch > 0) {
             // Compute the number of values we want to read in this page.
-            int leftInPage = (int) (endOfPageValueCount - valuesRead);
-            if (leftInPage == 0) {
-                DataPage page = pageReader.readPage();
-                if (page instanceof DataPageV1) {
-                    readPageV1((DataPageV1) page);
-                } else if (page instanceof DataPageV2) {
-                    readPageV2((DataPageV2) page);
-                } else {
-                    throw new RuntimeException("Unsupported page type: " + page.getClass());
+            if (readState.valuesToReadInPage == 0) {
+                int pageValueCount = readPage();
+                if (pageValueCount < 0) {
+                    // we've read all the pages; this could happen when we're reading a repeated
+                    // list and we
+                    // don't know where the list will end until we've seen all the pages.
+                    break;
                 }
-                leftInPage = (int) (endOfPageValueCount - valuesRead);
             }
-            int num = Math.min(readNumber, leftInPage);
-            if (isCurrentPageDictionaryEncoded) {
-                // Read and decode dictionary ids.
-                runLenDecoder.readDictionaryIds(
-                        num, dictionaryIds, vector, rowId, maxDefLevel, this.dictionaryIdsDecoder);
-
-                if (vector.hasDictionary() || (rowId == 0 && supportLazyDecode())) {
-                    // Column vector supports lazy decoding of dictionary values so just set the
-                    // dictionary.
-                    // We can't do this if rowId != 0 AND the column doesn't have a dictionary (i.e.
-                    // some
-                    // non-dictionary encoded values have already been added).
-                    vector.setDictionary(new ParquetDictionary(dictionary));
+
+            if (readState.isFinished()) {
+                break;
+            }
+
+            long pageRowId = readState.rowId;
+            int leftInBatch = readState.rowsToReadInBatch;
+            int leftInPage = readState.valuesToReadInPage;
+
+            int readBatch = Math.min(leftInBatch, leftInPage);
+
+            long rangeStart = readState.currentRangeStart();
+            long rangeEnd = readState.currentRangeEnd();
+
+            if (pageRowId < rangeStart) {
+                int toSkip = (int) (rangeStart - pageRowId);
+                if (toSkip >= leftInPage) { // drop page
+                    pageRowId += leftInPage;
+                    leftInPage = 0;
                 } else {
-                    readBatchFromDictionaryIds(rowId, num, vector, dictionaryIds);
+                    if (isCurrentPageDictionaryEncoded) {
+                        runLenDecoder.skipDictionaryIds(
+                                toSkip, maxDefLevel, this.dictionaryIdsDecoder);
+                        pageRowId += toSkip;
+                        leftInPage -= toSkip;
+                    } else {
+                        skipBatch(toSkip);
+                        pageRowId += toSkip;
+                        leftInPage -= toSkip;
+                    }
                 }
+            } else if (pageRowId > rangeEnd) {
+                readState.nextRange();
             } else {
-                if (vector.hasDictionary() && rowId != 0) {
-                    // This batch already has dictionary encoded values but this new page is not.
-                    // The batch
-                    // does not support a mix of dictionary and not so we will decode the
-                    // dictionary.
-                    readBatchFromDictionaryIds(0, rowId, vector, vector.getDictionaryIds());
+                long start = pageRowId;
+                long end = Math.min(rangeEnd, pageRowId + readBatch - 1);
+                int num = (int) (end - start + 1);
+
+                if (isCurrentPageDictionaryEncoded) {
+                    // Read and decode dictionary ids.
+                    runLenDecoder.readDictionaryIds(
+                            num,
+                            dictionaryIds,
+                            vector,
+                            rowId,
+                            maxDefLevel,
+                            this.dictionaryIdsDecoder);
+
+                    if (vector.hasDictionary() || (rowId == 0 && supportLazyDecode())) {
+                        // Column vector supports lazy decoding of dictionary values so just set the
+                        // dictionary.
+                        // We can't do this if rowId != 0 AND the column doesn't have a dictionary
+                        // (i.e.
+                        // some
+                        // non-dictionary encoded values have already been added).
+                        vector.setDictionary(new ParquetDictionary(dictionary));
+                    } else {
+                        readBatchFromDictionaryIds(rowId, num, vector, dictionaryIds);
+                    }
+                } else {
+                    if (vector.hasDictionary() && rowId != 0) {
+                        // This batch already has dictionary encoded values but this new page is
+                        // not.
+                        // The batch
+                        // does not support a mix of dictionary and not so we will decode the
+                        // dictionary.
+                        readBatchFromDictionaryIds(0, rowId, vector, vector.getDictionaryIds());
+                    }
+                    vector.setDictionary(null);
+                    readBatch(rowId, num, vector);
                 }
-                vector.setDictionary(null);
-                readBatch(rowId, num, vector);
+                leftInBatch -= num;
+                pageRowId += num;
+                leftInPage -= num;
+                rowId += num;
             }
+            readState.rowsToReadInBatch = leftInBatch;
+            readState.valuesToReadInPage = leftInPage;
+            readState.rowId = pageRowId;
+        }
+    }
 
-            valuesRead += num;
-            rowId += num;
-            readNumber -= num;
+    private int readPage() {
+        DataPage page = pageReader.readPage();
+        if (page == null) {
+            return -1;
         }
+        long pageFirstRowIndex = page.getFirstRowIndex().orElse(0L);
+
+        int pageValueCount =
+                page.accept(
+                        new DataPage.Visitor<Integer>() {
+                            @Override
+                            public Integer visit(DataPageV1 dataPageV1) {
+                                try {
+                                    return readPageV1(dataPageV1);
+                                } catch (IOException e) {
+                                    throw new RuntimeException(e);
+                                }
+                            }
+
+                            @Override
+                            public Integer visit(DataPageV2 dataPageV2) {
+                                try {
+                                    return readPageV2(dataPageV2);
+                                } catch (IOException e) {
+                                    throw new RuntimeException(e);
+                                }
+                            }
+                        });
+        readState.resetForNewPage(pageValueCount, pageFirstRowIndex);
+        return pageValueCount;
     }
 
-    private void readPageV1(DataPageV1 page) throws IOException {
-        this.pageValueCount = page.getValueCount();
+    private int readPageV1(DataPageV1 page) throws IOException {
+        int pageValueCount = page.getValueCount();
         ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
 
         // Initialize the decoders.
@@ -211,30 +290,31 @@ private void readPageV1(DataPageV1 page) throws IOException {
             ByteBufferInputStream in = bytes.toInputStream();
             rlReader.initFromPage(pageValueCount, in);
             this.runLenDecoder.initFromStream(pageValueCount, in);
-            prepareNewPage(page.getValueEncoding(), in);
+            prepareNewPage(page.getValueEncoding(), in, pageValueCount);
+            return pageValueCount;
         } catch (IOException e) {
             throw new IOException("could not read page " + page + " in col " + descriptor, e);
         }
     }
 
-    private void readPageV2(DataPageV2 page) throws IOException {
-        this.pageValueCount = page.getValueCount();
+    private int readPageV2(DataPageV2 page) throws IOException {
+        int pageValueCount = page.getValueCount();
 
         int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
         // do not read the length from the stream. v2 pages handle dividing the page bytes.
         this.runLenDecoder = new RunLengthDecoder(bitWidth, false);
         this.runLenDecoder.initFromStream(
-                this.pageValueCount, page.getDefinitionLevels().toInputStream());
+                pageValueCount, page.getDefinitionLevels().toInputStream());
         try {
-            prepareNewPage(page.getDataEncoding(), page.getData().toInputStream());
+            prepareNewPage(page.getDataEncoding(), page.getData().toInputStream(), pageValueCount);
+            return pageValueCount;
         } catch (IOException e) {
             throw new IOException("could not read page " + page + " in col " + descriptor, e);
         }
     }
 
-    private void prepareNewPage(Encoding dataEncoding, ByteBufferInputStream in)
+    private void prepareNewPage(Encoding dataEncoding, ByteBufferInputStream in, int pageValueCount)
             throws IOException {
-        this.endOfPageValueCount = valuesRead + pageValueCount;
         if (dataEncoding.usesDictionary()) {
             if (dictionary == null) {
                 throw new IOException(
@@ -269,6 +349,14 @@ private void prepareNewPage(Encoding dataEncoding, ByteBufferInputStream in)
         afterReadPage();
     }
 
+    final void skipDataBuffer(int length) {
+        try {
+            dataInputStream.skipFully(length);
+        } catch (IOException e) {
+            throw new ParquetDecodingException("Failed to skip " + length + " bytes", e);
+        }
+    }
+
     final ByteBuffer readDataBuffer(int length) {
         try {
             return dataInputStream.slice(length).order(ByteOrder.LITTLE_ENDIAN);
@@ -291,6 +379,8 @@ protected boolean supportLazyDecode() {
     /** Read batch from {@link #runLenDecoder} and {@link #dataInputStream}. */
     protected abstract void readBatch(int rowId, int num, VECTOR column);
 
+    protected abstract void skipBatch(int num);
+
     /**
      * Decode dictionary ids to data. From {@link #runLenDecoder} and {@link #dictionaryIdsDecoder}.
      */
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BooleanColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BooleanColumnReader.java
index d5dc231d8436..83d3c5a07d4b 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BooleanColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BooleanColumnReader.java
@@ -22,7 +22,7 @@
 import org.apache.paimon.data.columnar.writable.WritableIntVector;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.io.ParquetDecodingException;
 import org.apache.parquet.schema.PrimitiveType;
 
@@ -36,9 +36,9 @@ public class BooleanColumnReader extends AbstractColumnReader<WritableBooleanVec
 
     private byte currentByte = 0;
 
-    public BooleanColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
+    public BooleanColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadStore)
             throws IOException {
-        super(descriptor, pageReader);
+        super(descriptor, pageReadStore);
         checkTypeName(PrimitiveType.PrimitiveTypeName.BOOLEAN);
     }
 
@@ -94,6 +94,36 @@ protected void readBatch(int rowId, int num, WritableBooleanVector column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        int left = num;
+        while (left > 0) {
+            if (runLenDecoder.currentCount == 0) {
+                runLenDecoder.readNextGroup();
+            }
+            int n = Math.min(left, runLenDecoder.currentCount);
+            switch (runLenDecoder.mode) {
+                case RLE:
+                    if (runLenDecoder.currentValue == maxDefLevel) {
+                        for (int i = 0; i < n; i++) {
+                            readBoolean();
+                        }
+                    }
+                    break;
+                case PACKED:
+                    for (int i = 0; i < n; ++i) {
+                        if (runLenDecoder.currentBuffer[runLenDecoder.currentBufferIdx++]
+                                == maxDefLevel) {
+                            readBoolean();
+                        }
+                    }
+                    break;
+            }
+            left -= n;
+            runLenDecoder.currentCount -= n;
+        }
+    }
+
     private boolean readBoolean() {
         if (bitOffset == 0) {
             try {
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ByteColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ByteColumnReader.java
index bed9923d9be3..804b8bc0275e 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ByteColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ByteColumnReader.java
@@ -22,7 +22,7 @@
 import org.apache.paimon.data.columnar.writable.WritableIntVector;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.schema.PrimitiveType;
 
 import java.io.IOException;
@@ -31,8 +31,9 @@
 /** Byte {@link ColumnReader}. Using INT32 to store byte, so just cast int to byte. */
 public class ByteColumnReader extends AbstractColumnReader<WritableByteVector> {
 
-    public ByteColumnReader(ColumnDescriptor descriptor, PageReader pageReader) throws IOException {
-        super(descriptor, pageReader);
+    public ByteColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadStore)
+            throws IOException {
+        super(descriptor, pageReadStore);
         checkTypeName(PrimitiveType.PrimitiveTypeName.INT32);
     }
 
@@ -69,6 +70,38 @@ protected void readBatch(int rowId, int num, WritableByteVector column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        int left = num;
+        while (left > 0) {
+            if (runLenDecoder.currentCount == 0) {
+                runLenDecoder.readNextGroup();
+            }
+            int n = Math.min(left, runLenDecoder.currentCount);
+            switch (runLenDecoder.mode) {
+                case RLE:
+                    if (runLenDecoder.currentValue == maxDefLevel) {
+                        skipByte(n);
+                    }
+                    break;
+                case PACKED:
+                    for (int i = 0; i < n; ++i) {
+                        if (runLenDecoder.currentBuffer[runLenDecoder.currentBufferIdx++]
+                                == maxDefLevel) {
+                            skipByte(1);
+                        }
+                    }
+                    break;
+            }
+            left -= n;
+            runLenDecoder.currentCount -= n;
+        }
+    }
+
+    private void skipByte(int num) {
+        skipDataBuffer(4 * num);
+    }
+
     @Override
     protected void readBatchFromDictionaryIds(
             int rowId, int num, WritableByteVector column, WritableIntVector dictionaryIds) {
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BytesColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BytesColumnReader.java
index e83115c8a69f..6ee395e58568 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BytesColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BytesColumnReader.java
@@ -22,7 +22,7 @@
 import org.apache.paimon.data.columnar.writable.WritableIntVector;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.schema.PrimitiveType;
 
 import java.io.IOException;
@@ -31,9 +31,9 @@
 /** Bytes {@link ColumnReader}. A int length and bytes data. */
 public class BytesColumnReader extends AbstractColumnReader<WritableBytesVector> {
 
-    public BytesColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
+    public BytesColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadStore)
             throws IOException {
-        super(descriptor, pageReader);
+        super(descriptor, pageReadStore);
         checkTypeName(PrimitiveType.PrimitiveTypeName.BINARY);
     }
 
@@ -70,6 +70,41 @@ protected void readBatch(int rowId, int num, WritableBytesVector column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        int left = num;
+        while (left > 0) {
+            if (runLenDecoder.currentCount == 0) {
+                runLenDecoder.readNextGroup();
+            }
+            int n = Math.min(left, runLenDecoder.currentCount);
+            switch (runLenDecoder.mode) {
+                case RLE:
+                    if (runLenDecoder.currentValue == maxDefLevel) {
+                        skipBinary(n);
+                    }
+                    break;
+                case PACKED:
+                    for (int i = 0; i < n; ++i) {
+                        if (runLenDecoder.currentBuffer[runLenDecoder.currentBufferIdx++]
+                                == maxDefLevel) {
+                            skipBinary(1);
+                        }
+                    }
+                    break;
+            }
+            left -= n;
+            runLenDecoder.currentCount -= n;
+        }
+    }
+
+    private void skipBinary(int num) {
+        for (int i = 0; i < num; i++) {
+            int len = readDataBuffer(4).getInt();
+            skipDataBuffer(len);
+        }
+    }
+
     @Override
     protected void readBatchFromDictionaryIds(
             int rowId, int num, WritableBytesVector column, WritableIntVector dictionaryIds) {
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/DoubleColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/DoubleColumnReader.java
index d6d8aa2bbb22..2cffd406248e 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/DoubleColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/DoubleColumnReader.java
@@ -22,7 +22,7 @@
 import org.apache.paimon.data.columnar.writable.WritableIntVector;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.schema.PrimitiveType;
 
 import java.io.IOException;
@@ -31,9 +31,9 @@
 /** Double {@link ColumnReader}. */
 public class DoubleColumnReader extends AbstractColumnReader<WritableDoubleVector> {
 
-    public DoubleColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
+    public DoubleColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadStore)
             throws IOException {
-        super(descriptor, pageReader);
+        super(descriptor, pageReadStore);
         checkTypeName(PrimitiveType.PrimitiveTypeName.DOUBLE);
     }
 
@@ -70,6 +70,38 @@ protected void readBatch(int rowId, int num, WritableDoubleVector column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        int left = num;
+        while (left > 0) {
+            if (runLenDecoder.currentCount == 0) {
+                runLenDecoder.readNextGroup();
+            }
+            int n = Math.min(left, runLenDecoder.currentCount);
+            switch (runLenDecoder.mode) {
+                case RLE:
+                    if (runLenDecoder.currentValue == maxDefLevel) {
+                        skipDouble(n);
+                    }
+                    break;
+                case PACKED:
+                    for (int i = 0; i < n; ++i) {
+                        if (runLenDecoder.currentBuffer[runLenDecoder.currentBufferIdx++]
+                                == maxDefLevel) {
+                            skipDouble(1);
+                        }
+                    }
+                    break;
+            }
+            left -= n;
+            runLenDecoder.currentCount -= n;
+        }
+    }
+
+    private void skipDouble(int num) {
+        skipDataBuffer(8 * num);
+    }
+
     @Override
     protected void readBatchFromDictionaryIds(
             int rowId, int num, WritableDoubleVector column, WritableIntVector dictionaryIds) {
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/FixedLenBytesColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/FixedLenBytesColumnReader.java
index afce717a6719..25e1b466e465 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/FixedLenBytesColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/FixedLenBytesColumnReader.java
@@ -25,7 +25,7 @@
 import org.apache.paimon.format.parquet.ParquetSchemaConverter;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.schema.PrimitiveType;
 
@@ -39,8 +39,9 @@ public class FixedLenBytesColumnReader<VECTOR extends WritableColumnVector>
     private final int precision;
 
     public FixedLenBytesColumnReader(
-            ColumnDescriptor descriptor, PageReader pageReader, int precision) throws IOException {
-        super(descriptor, pageReader);
+            ColumnDescriptor descriptor, PageReadStore pageReadStore, int precision)
+            throws IOException {
+        super(descriptor, pageReadStore);
         checkTypeName(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY);
         this.precision = precision;
     }
@@ -79,6 +80,35 @@ protected void readBatch(int rowId, int num, VECTOR column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        int bytesLen = descriptor.getPrimitiveType().getTypeLength();
+        if (ParquetSchemaConverter.is32BitDecimal(precision)) {
+            for (int i = 0; i < num; i++) {
+                if (runLenDecoder.readInteger() == maxDefLevel) {
+                    skipDataBinary(bytesLen);
+                }
+            }
+        } else if (ParquetSchemaConverter.is64BitDecimal(precision)) {
+
+            for (int i = 0; i < num; i++) {
+                if (runLenDecoder.readInteger() == maxDefLevel) {
+                    skipDataBinary(bytesLen);
+                }
+            }
+        } else {
+            for (int i = 0; i < num; i++) {
+                if (runLenDecoder.readInteger() == maxDefLevel) {
+                    skipDataBinary(bytesLen);
+                }
+            }
+        }
+    }
+
+    private void skipDataBinary(int len) {
+        skipDataBuffer(len);
+    }
+
     @Override
     protected void readBatchFromDictionaryIds(
             int rowId, int num, VECTOR column, WritableIntVector dictionaryIds) {
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/FloatColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/FloatColumnReader.java
index 1f4adfa4b9c8..e9eec13df5fc 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/FloatColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/FloatColumnReader.java
@@ -22,7 +22,7 @@
 import org.apache.paimon.data.columnar.writable.WritableIntVector;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.schema.PrimitiveType;
 
 import java.io.IOException;
@@ -31,9 +31,9 @@
 /** Float {@link ColumnReader}. */
 public class FloatColumnReader extends AbstractColumnReader<WritableFloatVector> {
 
-    public FloatColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
+    public FloatColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadStore)
             throws IOException {
-        super(descriptor, pageReader);
+        super(descriptor, pageReadStore);
         checkTypeName(PrimitiveType.PrimitiveTypeName.FLOAT);
     }
 
@@ -70,6 +70,38 @@ protected void readBatch(int rowId, int num, WritableFloatVector column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        int left = num;
+        while (left > 0) {
+            if (runLenDecoder.currentCount == 0) {
+                runLenDecoder.readNextGroup();
+            }
+            int n = Math.min(left, runLenDecoder.currentCount);
+            switch (runLenDecoder.mode) {
+                case RLE:
+                    if (runLenDecoder.currentValue == maxDefLevel) {
+                        skipFloat(n);
+                    }
+                    break;
+                case PACKED:
+                    for (int i = 0; i < n; ++i) {
+                        if (runLenDecoder.currentBuffer[runLenDecoder.currentBufferIdx++]
+                                == maxDefLevel) {
+                            skipFloat(1);
+                        }
+                    }
+                    break;
+            }
+            left -= n;
+            runLenDecoder.currentCount -= n;
+        }
+    }
+
+    private void skipFloat(int num) {
+        skipDataBuffer(4 * num);
+    }
+
     @Override
     protected void readBatchFromDictionaryIds(
             int rowId, int num, WritableFloatVector column, WritableIntVector dictionaryIds) {
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/IntColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/IntColumnReader.java
index e38e916d187e..521ad998f6f1 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/IntColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/IntColumnReader.java
@@ -21,7 +21,7 @@
 import org.apache.paimon.data.columnar.writable.WritableIntVector;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.schema.PrimitiveType;
 
 import java.io.IOException;
@@ -30,8 +30,9 @@
 /** Int {@link ColumnReader}. */
 public class IntColumnReader extends AbstractColumnReader<WritableIntVector> {
 
-    public IntColumnReader(ColumnDescriptor descriptor, PageReader pageReader) throws IOException {
-        super(descriptor, pageReader);
+    public IntColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadStore)
+            throws IOException {
+        super(descriptor, pageReadStore);
         checkTypeName(PrimitiveType.PrimitiveTypeName.INT32);
     }
 
@@ -68,6 +69,38 @@ protected void readBatch(int rowId, int num, WritableIntVector column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        int left = num;
+        while (left > 0) {
+            if (runLenDecoder.currentCount == 0) {
+                runLenDecoder.readNextGroup();
+            }
+            int n = Math.min(left, runLenDecoder.currentCount);
+            switch (runLenDecoder.mode) {
+                case RLE:
+                    if (runLenDecoder.currentValue == maxDefLevel) {
+                        skipInteger(n);
+                    }
+                    break;
+                case PACKED:
+                    for (int i = 0; i < n; ++i) {
+                        if (runLenDecoder.currentBuffer[runLenDecoder.currentBufferIdx++]
+                                == maxDefLevel) {
+                            skipInteger(1);
+                        }
+                    }
+                    break;
+            }
+            left -= n;
+            runLenDecoder.currentCount -= n;
+        }
+    }
+
+    private void skipInteger(int num) {
+        skipDataBuffer(4 * num);
+    }
+
     @Override
     protected void readBatchFromDictionaryIds(
             int rowId, int num, WritableIntVector column, WritableIntVector dictionaryIds) {
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/LongColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/LongColumnReader.java
index a8e04eae673a..c4af086a7026 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/LongColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/LongColumnReader.java
@@ -22,7 +22,7 @@
 import org.apache.paimon.data.columnar.writable.WritableLongVector;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.schema.PrimitiveType;
 
 import java.io.IOException;
@@ -31,8 +31,9 @@
 /** Long {@link ColumnReader}. */
 public class LongColumnReader extends AbstractColumnReader<WritableLongVector> {
 
-    public LongColumnReader(ColumnDescriptor descriptor, PageReader pageReader) throws IOException {
-        super(descriptor, pageReader);
+    public LongColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadStore)
+            throws IOException {
+        super(descriptor, pageReadStore);
         checkTypeName(PrimitiveType.PrimitiveTypeName.INT64);
     }
 
@@ -69,6 +70,38 @@ protected void readBatch(int rowId, int num, WritableLongVector column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        int left = num;
+        while (left > 0) {
+            if (runLenDecoder.currentCount == 0) {
+                runLenDecoder.readNextGroup();
+            }
+            int n = Math.min(left, runLenDecoder.currentCount);
+            switch (runLenDecoder.mode) {
+                case RLE:
+                    if (runLenDecoder.currentValue == maxDefLevel) {
+                        skipValue(n);
+                    }
+                    break;
+                case PACKED:
+                    for (int i = 0; i < n; ++i) {
+                        if (runLenDecoder.currentBuffer[runLenDecoder.currentBufferIdx++]
+                                == maxDefLevel) {
+                            skipValue(1);
+                        }
+                    }
+                    break;
+            }
+            left -= n;
+            runLenDecoder.currentCount -= n;
+        }
+    }
+
+    private void skipValue(int num) {
+        skipDataBuffer(num * 8);
+    }
+
     @Override
     protected void readBatchFromDictionaryIds(
             int rowId, int num, WritableLongVector column, WritableIntVector dictionaryIds) {
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedColumnReader.java
index c89c77603dac..2dd673037cdd 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedColumnReader.java
@@ -278,7 +278,7 @@ private Pair<LevelDelegation, WritableColumnVector> readPrimitive(
             reader =
                     new NestedPrimitiveColumnReader(
                             descriptor,
-                            pages.getPageReader(descriptor),
+                            pages,
                             isUtcTimestamp,
                             descriptor.getPrimitiveType(),
                             field.getType(),
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedPrimitiveColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedPrimitiveColumnReader.java
index 7ee33a0bb5cc..fb04203cded6 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedPrimitiveColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedPrimitiveColumnReader.java
@@ -44,6 +44,7 @@
 import org.apache.parquet.column.page.DataPageV1;
 import org.apache.parquet.column.page.DataPageV2;
 import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.column.page.PageReader;
 import org.apache.parquet.column.values.ValuesReader;
 import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder;
@@ -82,15 +83,6 @@ public class NestedPrimitiveColumnReader implements ColumnReader<WritableColumnV
 
     private final boolean isUtcTimestamp;
 
-    /** Total number of values read. */
-    private long valuesRead;
-
-    /**
-     * value that indicates the end of the current page. That is, if valuesRead ==
-     * endOfPageValueCount, we are at the end of the page.
-     */
-    private long endOfPageValueCount;
-
     /** If true, the current page is dictionary encoded. */
     private boolean isCurrentPageDictionaryEncoded;
 
@@ -104,7 +96,12 @@ public class NestedPrimitiveColumnReader implements ColumnReader<WritableColumnV
     private ParquetDataColumnReader dataColumn;
 
     /** Total values in the current page. */
-    private int pageValueCount;
+    //    private int pageValueCount;
+
+    /**
+     * Helper struct to track intermediate states while reading Parquet pages in the column chunk.
+     */
+    protected final ParquetReadState readState;
 
     // flag to indicate if there is no data in parquet data page
     private boolean eof = false;
@@ -115,7 +112,7 @@ public class NestedPrimitiveColumnReader implements ColumnReader<WritableColumnV
 
     public NestedPrimitiveColumnReader(
             ColumnDescriptor descriptor,
-            PageReader pageReader,
+            PageReadStore pageReadStore,
             boolean isUtcTimestamp,
             Type parquetType,
             DataType dataType,
@@ -124,7 +121,7 @@ public NestedPrimitiveColumnReader(
             throws IOException {
         this.descriptor = descriptor;
         this.type = parquetType;
-        this.pageReader = pageReader;
+        this.pageReader = pageReadStore.getPageReader(descriptor);
         this.maxDefLevel = descriptor.getMaxDefinitionLevel();
         this.isUtcTimestamp = isUtcTimestamp;
         this.dataType = dataType;
@@ -132,6 +129,9 @@ public NestedPrimitiveColumnReader(
         this.readMapKey = readMapKey;
 
         DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
+
+        this.readState = new ParquetReadState(pageReadStore.getRowIndexes().orElse(null));
+
         if (dictionaryPage != null) {
             try {
                 this.dictionary =
@@ -166,23 +166,55 @@ public WritableColumnVector readAndNewVector(int readNumber, WritableColumnVecto
             isFirstRow = false;
         }
 
-        // index to set value.
-        int index = 0;
-        int valueIndex = 0;
         List<Object> valueList = new ArrayList<>();
 
+        int valueIndex = collectDataFromParquetPage(readNumber, valueList);
+
+        return fillColumnVector(valueIndex, valueList);
+    }
+
+    private int collectDataFromParquetPage(int total, List<Object> valueList) throws IOException {
+        int valueIndex = 0;
         // repeated type need two loops to read data.
-        while (!eof && index < readNumber) {
+
+        readState.resetForNewBatch(total);
+
+        while (!eof && readState.rowsToReadInBatch > 0) {
+
+            if (readState.isFinished()) { // finished to read
+                eof = true;
+                break;
+            }
+
+            long pageRowId = readState.rowId;
+            long rangeStart = readState.currentRangeStart();
+            long rangeEnd = readState.currentRangeEnd();
+
+            if (pageRowId > rangeEnd) {
+                readState.nextRange();
+                continue;
+            }
+
+            boolean needFilterSkip = pageRowId < rangeStart;
+
             do {
-                if (!lastValue.shouldSkip) {
+
+                if (!lastValue.shouldSkip && !needFilterSkip) {
                     valueList.add(lastValue.value);
                     valueIndex++;
                 }
             } while (readValue() && (repetitionLevel != 0));
-            index++;
+
+            if (pageRowId == readState.rowId) {
+                readState.rowId = readState.rowId + 1;
+            }
+
+            if (!needFilterSkip) {
+                readState.rowsToReadInBatch = readState.rowsToReadInBatch - 1;
+            }
         }
 
-        return fillColumnVector(valueIndex, valueList);
+        return valueIndex;
     }
 
     public LevelDelegation getLevelDelegation() {
@@ -255,20 +287,24 @@ private void readAndSaveRepetitionAndDefinitionLevels() {
         // get the values of repetition and definitionLevel
         repetitionLevel = repetitionLevelColumn.nextInt();
         definitionLevel = definitionLevelColumn.nextInt();
-        valuesRead++;
+        readState.valuesToReadInPage = readState.valuesToReadInPage - 1;
         repetitionLevelList.add(repetitionLevel);
         definitionLevelList.add(definitionLevel);
     }
 
     private int readPageIfNeed() throws IOException {
         // Compute the number of values we want to read in this page.
-        int leftInPage = (int) (endOfPageValueCount - valuesRead);
-        if (leftInPage == 0) {
-            // no data left in current page, load data from new page
-            readPage();
-            leftInPage = (int) (endOfPageValueCount - valuesRead);
+        if (readState.valuesToReadInPage == 0) {
+            int pageValueCount = readPage();
+            // 返回当前 page 的数据量
+            if (pageValueCount < 0) {
+                // we've read all the pages; this could happen when we're reading a repeated list
+                // and we
+                // don't know where the list will end until we've seen all the pages.
+                return -1;
+            }
         }
-        return leftInPage;
+        return readState.valuesToReadInPage;
     }
 
     private Object readPrimitiveTypedRow(DataType category) {
@@ -528,33 +564,36 @@ private static HeapBytesVector getHeapBytesVector(int total, List valueList) {
         return phbv;
     }
 
-    protected void readPage() {
+    protected int readPage() {
         DataPage page = pageReader.readPage();
 
         if (page == null) {
-            return;
+            return -1;
         }
 
-        page.accept(
-                new DataPage.Visitor<Void>() {
-                    @Override
-                    public Void visit(DataPageV1 dataPageV1) {
-                        readPageV1(dataPageV1);
-                        return null;
-                    }
+        long pageFirstRowIndex = page.getFirstRowIndex().orElse(0L);
 
-                    @Override
-                    public Void visit(DataPageV2 dataPageV2) {
-                        readPageV2(dataPageV2);
-                        return null;
-                    }
-                });
+        int pageValueCount =
+                page.accept(
+                        new DataPage.Visitor<Integer>() {
+                            @Override
+                            public Integer visit(DataPageV1 dataPageV1) {
+                                return readPageV1(dataPageV1);
+                            }
+
+                            @Override
+                            public Integer visit(DataPageV2 dataPageV2) {
+                                return readPageV2(dataPageV2);
+                            }
+                        });
+        readState.resetForNewPage(pageValueCount, pageFirstRowIndex);
+        return pageValueCount;
     }
 
     private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount)
             throws IOException {
-        this.pageValueCount = valueCount;
-        this.endOfPageValueCount = valuesRead + pageValueCount;
+        //        this.pageValueCount = valueCount;
+        //        this.endOfPageValueCount = valuesRead + pageValueCount;
         if (dataEncoding.usesDictionary()) {
             this.dataColumn = null;
             if (dictionary == null) {
@@ -577,13 +616,14 @@ private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int
         }
 
         try {
-            dataColumn.initFromPage(pageValueCount, in);
+            dataColumn.initFromPage(valueCount, in);
         } catch (IOException e) {
             throw new IOException(String.format("Could not read page in col %s.", descriptor), e);
         }
     }
 
-    private void readPageV1(DataPageV1 page) {
+    private int readPageV1(DataPageV1 page) {
+        int pageValueCount = page.getValueCount();
         ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
         ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL);
         this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
@@ -597,15 +637,16 @@ private void readPageV1(DataPageV1 page) {
             LOG.debug("Reading definition levels at {}.", in.position());
             dlReader.initFromPage(pageValueCount, in);
             LOG.debug("Reading data at {}.", in.position());
-            initDataReader(page.getValueEncoding(), in, page.getValueCount());
+            initDataReader(page.getValueEncoding(), in, pageValueCount);
+            return pageValueCount;
         } catch (IOException e) {
             throw new ParquetDecodingException(
                     String.format("Could not read page %s in col %s.", page, descriptor), e);
         }
     }
 
-    private void readPageV2(DataPageV2 page) {
-        this.pageValueCount = page.getValueCount();
+    private int readPageV2(DataPageV2 page) {
+        int pageValueCount = page.getValueCount();
         this.repetitionLevelColumn =
                 newRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels());
         this.definitionLevelColumn =
@@ -615,8 +656,8 @@ private void readPageV2(DataPageV2 page) {
                     "Page data size {} bytes and {} records.",
                     page.getData().size(),
                     pageValueCount);
-            initDataReader(
-                    page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount());
+            initDataReader(page.getDataEncoding(), page.getData().toInputStream(), pageValueCount);
+            return pageValueCount;
         } catch (IOException e) {
             throw new ParquetDecodingException(
                     String.format("Could not read page %s in col %s.", page, descriptor), e);
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetReadState.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetReadState.java
new file mode 100644
index 000000000000..a6003676825a
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetReadState.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.parquet.reader;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.PrimitiveIterator;
+
+/** Parquet reader state for column index. */
+public class ParquetReadState {
+    /** A special row range used when there is no row indexes (hence all rows must be included). */
+    private static final RowRange MAX_ROW_RANGE = new RowRange(Long.MIN_VALUE, Long.MAX_VALUE);
+
+    /**
+     * A special row range used when the row indexes are present AND all the row ranges have been
+     * processed. This serves as a sentinel at the end indicating that all rows come after the last
+     * row range should be skipped.
+     */
+    private static final RowRange END_ROW_RANGE = new RowRange(Long.MAX_VALUE, Long.MIN_VALUE);
+
+    private final Iterator<RowRange> rowRanges;
+
+    private RowRange currentRange;
+
+    /** row index for the next read. */
+    long rowId;
+
+    int valuesToReadInPage;
+    int rowsToReadInBatch;
+
+    public ParquetReadState(PrimitiveIterator.OfLong rowIndexes) {
+        this.rowRanges = constructRanges(rowIndexes);
+        nextRange();
+    }
+
+    /**
+     * Construct a list of row ranges from the given `rowIndexes`. For example, suppose the
+     * `rowIndexes` are `[0, 1, 2, 4, 5, 7, 8, 9]`, it will be converted into 3 row ranges: `[0-2],
+     * [4-5], [7-9]`.
+     */
+    private Iterator<RowRange> constructRanges(PrimitiveIterator.OfLong rowIndexes) {
+        if (rowIndexes == null) {
+            return null;
+        }
+
+        List<RowRange> rowRanges = new ArrayList<>();
+        long currentStart = Long.MIN_VALUE;
+        long previous = Long.MIN_VALUE;
+
+        while (rowIndexes.hasNext()) {
+            long idx = rowIndexes.nextLong();
+            if (currentStart == Long.MIN_VALUE) {
+                currentStart = idx;
+            } else if (previous + 1 != idx) {
+                RowRange range = new RowRange(currentStart, previous);
+                rowRanges.add(range);
+                currentStart = idx;
+            }
+            previous = idx;
+        }
+
+        if (previous != Long.MIN_VALUE) {
+            rowRanges.add(new RowRange(currentStart, previous));
+        }
+
+        return rowRanges.iterator();
+    }
+
+    /** Must be called at the beginning of reading a new batch. */
+    void resetForNewBatch(int batchSize) {
+        this.rowsToReadInBatch = batchSize;
+    }
+
+    /** Must be called at the beginning of reading a new page. */
+    void resetForNewPage(int totalValuesInPage, long pageFirstRowIndex) {
+        this.valuesToReadInPage = totalValuesInPage;
+        this.rowId = pageFirstRowIndex;
+    }
+
+    /** Returns the start index of the current row range. */
+    public long currentRangeStart() {
+        return currentRange.start;
+    }
+
+    /** Returns the end index of the current row range. */
+    public long currentRangeEnd() {
+        return currentRange.end;
+    }
+
+    public boolean isFinished() {
+        return this.currentRange.equals(this.END_ROW_RANGE);
+    }
+
+    public boolean isMaxRange() {
+        return this.currentRange.equals(this.MAX_ROW_RANGE);
+    }
+
+    public RowRange getCurrentRange() {
+        return currentRange;
+    }
+
+    /** Advance to the next range. */
+    public void nextRange() {
+        if (rowRanges == null) {
+            currentRange = MAX_ROW_RANGE;
+        } else if (!rowRanges.hasNext()) {
+            currentRange = END_ROW_RANGE;
+        } else {
+            currentRange = rowRanges.next();
+        }
+    }
+
+    /** Helper struct to represent a range of row indexes `[start, end]`. */
+    public static class RowRange {
+        final long start;
+        final long end;
+
+        RowRange(long start, long end) {
+            this.start = start;
+            this.end = end;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (!(obj instanceof RowRange)) {
+                return false;
+            }
+            return ((RowRange) obj).start == this.start && ((RowRange) obj).end == this.end;
+        }
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetSplitReaderUtil.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetSplitReaderUtil.java
index 860ec54fa88b..a2be77414d5a 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetSplitReaderUtil.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetSplitReaderUtil.java
@@ -87,58 +87,45 @@ public static ColumnReader createColumnReader(
                 getAllColumnDescriptorByType(depth, type, columnDescriptors);
         switch (fieldType.getTypeRoot()) {
             case BOOLEAN:
-                return new BooleanColumnReader(
-                        descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                return new BooleanColumnReader(descriptors.get(0), pages);
             case TINYINT:
-                return new ByteColumnReader(
-                        descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                return new ByteColumnReader(descriptors.get(0), pages);
             case DOUBLE:
-                return new DoubleColumnReader(
-                        descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                return new DoubleColumnReader(descriptors.get(0), pages);
             case FLOAT:
-                return new FloatColumnReader(
-                        descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                return new FloatColumnReader(descriptors.get(0), pages);
             case INTEGER:
             case DATE:
             case TIME_WITHOUT_TIME_ZONE:
-                return new IntColumnReader(
-                        descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                return new IntColumnReader(descriptors.get(0), pages);
             case BIGINT:
-                return new LongColumnReader(
-                        descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                return new LongColumnReader(descriptors.get(0), pages);
             case SMALLINT:
-                return new ShortColumnReader(
-                        descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                return new ShortColumnReader(descriptors.get(0), pages);
             case CHAR:
             case VARCHAR:
             case BINARY:
             case VARBINARY:
-                return new BytesColumnReader(
-                        descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                return new BytesColumnReader(descriptors.get(0), pages);
             case TIMESTAMP_WITHOUT_TIME_ZONE:
             case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
                 if (descriptors.get(0).getPrimitiveType().getPrimitiveTypeName()
                         == PrimitiveType.PrimitiveTypeName.INT64) {
-                    return new LongColumnReader(
-                            descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                    return new LongColumnReader(descriptors.get(0), pages);
                 }
-                return new TimestampColumnReader(
-                        true, descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                return new TimestampColumnReader(true, descriptors.get(0), pages);
             case DECIMAL:
                 switch (descriptors.get(0).getPrimitiveType().getPrimitiveTypeName()) {
                     case INT32:
-                        return new IntColumnReader(
-                                descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                        return new IntColumnReader(descriptors.get(0), pages);
                     case INT64:
-                        return new LongColumnReader(
-                                descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                        return new LongColumnReader(descriptors.get(0), pages);
                     case BINARY:
-                        return new BytesColumnReader(
-                                descriptors.get(0), pages.getPageReader(descriptors.get(0)));
+                        return new BytesColumnReader(descriptors.get(0), pages);
                     case FIXED_LEN_BYTE_ARRAY:
                         return new FixedLenBytesColumnReader(
                                 descriptors.get(0),
-                                pages.getPageReader(descriptors.get(0)),
+                                pages,
                                 ((DecimalType) fieldType).getPrecision());
                 }
             case ARRAY:
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/RunLengthDecoder.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/RunLengthDecoder.java
index 2dd1655d571f..ebb8f28fa1ee 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/RunLengthDecoder.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/RunLengthDecoder.java
@@ -194,6 +194,51 @@ private void readDictionaryIdData(int total, WritableIntVector c, int rowId) {
         }
     }
 
+    void skipDictionaryIds(int total, int level, RunLengthDecoder data) {
+        int left = total;
+        while (left > 0) {
+            if (this.currentCount == 0) {
+                this.readNextGroup();
+            }
+            int n = Math.min(left, this.currentCount);
+            switch (mode) {
+                case RLE:
+                    if (currentValue == level) {
+                        data.skipDictionaryIdData(n);
+                    }
+                    break;
+                case PACKED:
+                    for (int i = 0; i < n; ++i) {
+                        if (currentBuffer[currentBufferIdx++] == level) {
+                            data.readInteger();
+                        }
+                    }
+                    break;
+            }
+            left -= n;
+            currentCount -= n;
+        }
+    }
+
+    private void skipDictionaryIdData(int total) {
+        int left = total;
+        while (left > 0) {
+            if (this.currentCount == 0) {
+                this.readNextGroup();
+            }
+            int n = Math.min(left, this.currentCount);
+            switch (mode) {
+                case RLE:
+                    break;
+                case PACKED:
+                    currentBufferIdx += n;
+                    break;
+            }
+            left -= n;
+            currentCount -= n;
+        }
+    }
+
     /** Reads the next varint encoded int. */
     private int readUnsignedVarInt() throws IOException {
         int value = 0;
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ShortColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ShortColumnReader.java
index 7b32232261a7..bdb2f401fa3f 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ShortColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ShortColumnReader.java
@@ -22,7 +22,7 @@
 import org.apache.paimon.data.columnar.writable.WritableShortVector;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.schema.PrimitiveType;
 
 import java.io.IOException;
@@ -30,9 +30,9 @@
 /** Short {@link ColumnReader}. Using INT32 to store short, so just cast int to short. */
 public class ShortColumnReader extends AbstractColumnReader<WritableShortVector> {
 
-    public ShortColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
+    public ShortColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadStore)
             throws IOException {
-        super(descriptor, pageReader);
+        super(descriptor, pageReadStore);
         checkTypeName(PrimitiveType.PrimitiveTypeName.INT32);
     }
 
@@ -71,6 +71,38 @@ protected void readBatch(int rowId, int num, WritableShortVector column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        int left = num;
+        while (left > 0) {
+            if (runLenDecoder.currentCount == 0) {
+                runLenDecoder.readNextGroup();
+            }
+            int n = Math.min(left, runLenDecoder.currentCount);
+            switch (runLenDecoder.mode) {
+                case RLE:
+                    if (runLenDecoder.currentValue == maxDefLevel) {
+                        skipShot(n);
+                    }
+                    break;
+                case PACKED:
+                    for (int i = 0; i < n; ++i) {
+                        if (runLenDecoder.currentBuffer[runLenDecoder.currentBufferIdx++]
+                                == maxDefLevel) {
+                            skipShot(1);
+                        }
+                    }
+                    break;
+            }
+            left -= n;
+            runLenDecoder.currentCount -= n;
+        }
+    }
+
+    private void skipShot(int num) {
+        skipDataBuffer(4 * num);
+    }
+
     @Override
     protected void readBatchFromDictionaryIds(
             int rowId, int num, WritableShortVector column, WritableIntVector dictionaryIds) {
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/TimestampColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/TimestampColumnReader.java
index 4a279ff90e15..8767173315c2 100644
--- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/TimestampColumnReader.java
+++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/TimestampColumnReader.java
@@ -23,7 +23,7 @@
 import org.apache.paimon.data.columnar.writable.WritableTimestampVector;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.schema.PrimitiveType;
 
@@ -49,9 +49,9 @@ public class TimestampColumnReader extends AbstractColumnReader<WritableTimestam
     private final boolean utcTimestamp;
 
     public TimestampColumnReader(
-            boolean utcTimestamp, ColumnDescriptor descriptor, PageReader pageReader)
+            boolean utcTimestamp, ColumnDescriptor descriptor, PageReadStore pageReadStore)
             throws IOException {
-        super(descriptor, pageReader);
+        super(descriptor, pageReadStore);
         this.utcTimestamp = utcTimestamp;
         checkTypeName(PrimitiveType.PrimitiveTypeName.INT96);
     }
@@ -75,6 +75,15 @@ protected void readBatch(int rowId, int num, WritableTimestampVector column) {
         }
     }
 
+    @Override
+    protected void skipBatch(int num) {
+        for (int i = 0; i < num; i++) {
+            if (runLenDecoder.readInteger() == maxDefLevel) {
+                skipDataBuffer(12);
+            }
+        }
+    }
+
     @Override
     protected void readBatchFromDictionaryIds(
             int rowId, int num, WritableTimestampVector column, WritableIntVector dictionaryIds) {

From db912e4772b2a4c6728ec05b2bc2bf7bf3463bed Mon Sep 17 00:00:00 2001
From: "aiden.dong" <782112163@qq.com>
Date: Tue, 3 Dec 2024 13:33:44 +0800
Subject: [PATCH 29/29] merge

---
 .../table/PrimaryKeyFileStoreTableTest.java   | 171 ++++++++++++++++++
 1 file changed, 171 insertions(+)

diff --git a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
index e65db5ce015e..a2c3020273af 100644
--- a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
@@ -100,6 +100,7 @@
 import static org.apache.paimon.CoreOptions.ChangelogProducer.LOOKUP;
 import static org.apache.paimon.CoreOptions.DELETION_VECTORS_ENABLED;
 import static org.apache.paimon.CoreOptions.FILE_FORMAT;
+import static org.apache.paimon.CoreOptions.FILE_INDEX_IN_MANIFEST_THRESHOLD;
 import static org.apache.paimon.CoreOptions.LOOKUP_LOCAL_FILE_TYPE;
 import static org.apache.paimon.CoreOptions.MERGE_ENGINE;
 import static org.apache.paimon.CoreOptions.MergeEngine;
@@ -925,6 +926,92 @@ public void testDeletionVectorsWithFileIndexInMeta() throws Exception {
         assertThat(((DataSplit) splits.get(0)).dataFiles().size()).isEqualTo(1);
     }
 
+    @Test
+    public void testDeletionVectorsWithBitmapFileIndexInFile() throws Exception {
+        FileStoreTable table =
+                createFileStoreTable(
+                        conf -> {
+                            conf.set(BUCKET, 1);
+                            conf.set(DELETION_VECTORS_ENABLED, true);
+                            conf.set(TARGET_FILE_SIZE, MemorySize.ofBytes(1));
+                            conf.set(FILE_INDEX_IN_MANIFEST_THRESHOLD, MemorySize.ofBytes(1));
+                            conf.set("file-index.bitmap.columns", "b");
+                        });
+
+        StreamTableWrite write =
+                table.newWrite(commitUser).withIOManager(new IOManagerImpl(tempDir.toString()));
+        StreamTableCommit commit = table.newCommit(commitUser);
+
+        write.write(rowData(1, 1, 300L));
+        write.write(rowData(1, 2, 400L));
+        write.write(rowData(1, 3, 100L));
+        write.write(rowData(1, 4, 100L));
+        commit.commit(0, write.prepareCommit(true, 0));
+
+        write.write(rowData(1, 1, 100L));
+        write.write(rowData(1, 2, 100L));
+        write.write(rowData(1, 3, 300L));
+        write.write(rowData(1, 5, 100L));
+        commit.commit(1, write.prepareCommit(true, 1));
+
+        write.write(rowData(1, 4, 200L));
+        commit.commit(2, write.prepareCommit(true, 2));
+
+        PredicateBuilder builder = new PredicateBuilder(ROW_TYPE);
+        List<Split> splits = toSplits(table.newSnapshotReader().read().dataSplits());
+        assertThat(((DataSplit) splits.get(0)).dataFiles().size()).isEqualTo(2);
+        TableRead read = table.newRead().withFilter(builder.equal(2, 100L));
+        assertThat(getResult(read, splits, BATCH_ROW_TO_STRING))
+                .hasSameElementsAs(
+                        Arrays.asList(
+                                "1|1|100|binary|varbinary|mapKey:mapVal|multiset",
+                                "1|2|100|binary|varbinary|mapKey:mapVal|multiset",
+                                "1|5|100|binary|varbinary|mapKey:mapVal|multiset"));
+    }
+
+    @Test
+    public void testDeletionVectorsWithBitmapFileIndexInMeta() throws Exception {
+        FileStoreTable table =
+                createFileStoreTable(
+                        conf -> {
+                            conf.set(BUCKET, 1);
+                            conf.set(DELETION_VECTORS_ENABLED, true);
+                            conf.set(TARGET_FILE_SIZE, MemorySize.ofBytes(1));
+                            conf.set(FILE_INDEX_IN_MANIFEST_THRESHOLD, MemorySize.ofMebiBytes(1));
+                            conf.set("file-index.bitmap.columns", "b");
+                        });
+
+        StreamTableWrite write =
+                table.newWrite(commitUser).withIOManager(new IOManagerImpl(tempDir.toString()));
+        StreamTableCommit commit = table.newCommit(commitUser);
+
+        write.write(rowData(1, 1, 300L));
+        write.write(rowData(1, 2, 400L));
+        write.write(rowData(1, 3, 100L));
+        write.write(rowData(1, 4, 100L));
+        commit.commit(0, write.prepareCommit(true, 0));
+
+        write.write(rowData(1, 1, 100L));
+        write.write(rowData(1, 2, 100L));
+        write.write(rowData(1, 3, 300L));
+        write.write(rowData(1, 5, 100L));
+        commit.commit(1, write.prepareCommit(true, 1));
+
+        write.write(rowData(1, 4, 200L));
+        commit.commit(2, write.prepareCommit(true, 2));
+
+        PredicateBuilder builder = new PredicateBuilder(ROW_TYPE);
+        List<Split> splits = toSplits(table.newSnapshotReader().read().dataSplits());
+        assertThat(((DataSplit) splits.get(0)).dataFiles().size()).isEqualTo(2);
+        TableRead read = table.newRead().withFilter(builder.equal(2, 100L));
+        assertThat(getResult(read, splits, BATCH_ROW_TO_STRING))
+                .hasSameElementsAs(
+                        Arrays.asList(
+                                "1|1|100|binary|varbinary|mapKey:mapVal|multiset",
+                                "1|2|100|binary|varbinary|mapKey:mapVal|multiset",
+                                "1|5|100|binary|varbinary|mapKey:mapVal|multiset"));
+    }
+
     @Test
     public void testWithShardFirstRow() throws Exception {
         FileStoreTable table =
@@ -1189,6 +1276,90 @@ public void testPartialUpdateRemoveRecordOnDelete() throws Exception {
         commit.close();
     }
 
+    @Test
+    public void testPartialUpdateRemoveRecordOnSequenceGroup() throws Exception {
+        RowType rowType =
+                RowType.of(
+                        new DataType[] {
+                            DataTypes.INT(),
+                            DataTypes.INT(),
+                            DataTypes.INT(),
+                            DataTypes.INT(),
+                            DataTypes.INT(),
+                            DataTypes.INT(),
+                            DataTypes.INT()
+                        },
+                        new String[] {"pt", "a", "b", "seq1", "c", "d", "seq2"});
+        FileStoreTable table =
+                createFileStoreTable(
+                        options -> {
+                            options.set("merge-engine", "partial-update");
+                            options.set("fields.seq1.sequence-group", "b");
+                            options.set("fields.seq2.sequence-group", "c,d");
+                            options.set("partial-update.remove-record-on-sequence-group", "seq2");
+                        },
+                        rowType);
+        FileStoreTable wrongTable =
+                createFileStoreTable(
+                        options -> {
+                            options.set("merge-engine", "partial-update");
+                            options.set("fields.seq1.sequence-group", "b");
+                            options.set("fields.seq2.sequence-group", "c,d");
+                            options.set("partial-update.remove-record-on-sequence-group", "b");
+                        },
+                        rowType);
+        Function<InternalRow, String> rowToString = row -> internalRowToString(row, rowType);
+
+        assertThatThrownBy(() -> wrongTable.newWrite(""))
+                .hasMessageContaining(
+                        "field 'b' defined in 'partial-update.remove-record-on-sequence-group' option must be part of sequence groups");
+
+        SnapshotReader snapshotReader = table.newSnapshotReader();
+        TableRead read = table.newRead();
+        StreamTableWrite write = table.newWrite("");
+        StreamTableCommit commit = table.newCommit("");
+        // 1. Inserts
+        write.write(GenericRow.of(1, 1, 10, 1, 20, 20, 1));
+        write.write(GenericRow.of(1, 1, 11, 2, 25, 25, 0));
+        write.write(GenericRow.of(1, 1, 12, 1, 29, 29, 2));
+        commit.commit(0, write.prepareCommit(true, 0));
+        List<String> result =
+                getResult(read, toSplits(snapshotReader.read().dataSplits()), rowToString);
+        assertThat(result).containsExactlyInAnyOrder("+I[1, 1, 11, 2, 29, 29, 2]");
+
+        // 2. Update Before
+        write.write(GenericRow.ofKind(RowKind.UPDATE_BEFORE, 1, 1, 11, 2, 29, 29, 2));
+        commit.commit(1, write.prepareCommit(true, 1));
+        result = getResult(read, toSplits(snapshotReader.read().dataSplits()), rowToString);
+        assertThat(result).containsExactlyInAnyOrder("+I[1, 1, NULL, 2, NULL, NULL, 2]");
+
+        // 3. Update After
+        write.write(GenericRow.ofKind(RowKind.UPDATE_AFTER, 1, 1, 11, 2, 30, 30, 3));
+        commit.commit(2, write.prepareCommit(true, 2));
+        result = getResult(read, toSplits(snapshotReader.read().dataSplits()), rowToString);
+        assertThat(result).containsExactlyInAnyOrder("+I[1, 1, 11, 2, 30, 30, 3]");
+
+        // 4. Retracts
+        write.write(GenericRow.ofKind(RowKind.DELETE, 1, 1, 12, 3, 30, 30, 2));
+        commit.commit(3, write.prepareCommit(true, 3));
+        result = getResult(read, toSplits(snapshotReader.read().dataSplits()), rowToString);
+        assertThat(result).containsExactlyInAnyOrder("+I[1, 1, NULL, 3, 30, 30, 3]");
+
+        write.write(GenericRow.ofKind(RowKind.DELETE, 1, 1, 12, 2, 30, 31, 5));
+        commit.commit(4, write.prepareCommit(true, 4));
+        result = getResult(read, toSplits(snapshotReader.read().dataSplits()), rowToString);
+        assertThat(result).isEmpty();
+
+        // 5. Inserts
+        write.write(GenericRow.of(1, 1, 11, 2, 30, 31, 6));
+        commit.commit(5, write.prepareCommit(true, 5));
+        result = getResult(read, toSplits(snapshotReader.read().dataSplits()), rowToString);
+        assertThat(result).containsExactlyInAnyOrder("+I[1, 1, 11, 2, 30, 31, 6]");
+
+        write.close();
+        commit.close();
+    }
+
     @Test
     public void testPartialUpdateWithAgg() throws Exception {
         RowType rowType =