Skip to content

Commit

Permalink
add tests for pushdown all types
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed Jun 15, 2024
1 parent 1563715 commit 7291aa5
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 0 deletions.
5 changes: 5 additions & 0 deletions scripts/generate_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate
query = "CREATE table test_table AS SELECT {'i':i, 'j':i+1} as value, i%2 as part from range(0,10) tbl(i);"
generate_test_data_delta_rs("simple_partitioned_with_structs", query, "part");

## Partitioned table with all types we can file skip on
for type in ["bool", "int", "tinyint", "smallint", "bigint", "float", "double", "varchar"]:
query = f"CREATE table test_table as select i::{type} as value, i::{type} as part from range(0,2) tbl(i)"
generate_test_data_delta_rs(f"test_file_skipping/{type}", query, "part");

## Simple table with deletion vector
con = duckdb.connect()
con.query(f"COPY (SELECT i as id, ('val' || i::VARCHAR) as value FROM range(0,1000000) tbl(i))TO '{TMP_PATH}/simple_sf1_with_dv.parquet'")
Expand Down
44 changes: 44 additions & 0 deletions test/sql/generated/file_skipping_all_types.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# name: test/sql/generated/file_skipping_all_types.test
# description: Test filter pushdown succeeds on all file types we can push down
# group: [delta_generated]

require parquet

require delta

require-env GENERATED_DATA_AVAILABLE

# TODO: this doesn't appear to skip files yet
# TODO: add tests once https://github.com/duckdb/duckdb/pull/12488 is available

query I
select value
from delta_scan('./data/generated/test_file_skipping/bool/delta_lake')
where part != false
order by value
----
true

foreach type bool int tinyint smallint bigint varchar

query I
select value
from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake')
where part != 0
order by value
----
1

endloop

foreach type float double

query I
select value
from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake')
where part > 0.5
order by value
----
1.0

endloop

0 comments on commit 7291aa5

Please sign in to comment.