Skip to content

Commit

Permalink
add regression test
Browse files Browse the repository at this point in the history
  • Loading branch information
jordanrfrazier committed Sep 27, 2023
1 parent 73b93c9 commit 2bab70f
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{"_time":"2022-01-04T14:38:31.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-04T14:38:31.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":499.48,"item":"0da9b3fd-2c92-4b87-92b0-5137eaf6ff75"}
{"_time":"2022-01-05T20:40:03.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-05T20:40:03.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":498.16,"item":"f9cdde05-40f9-48fd-812e-1c3936589184"}
{"_time":"2022-01-06T04:54:59.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-06T04:54:59.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":491.38,"item":"64cd0de2-02b8-4420-8b8a-57ad4d0b9aa2"}
{"_time":"2022-01-06T07:14:07.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-06T07:14:07.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":498.32,"item":"3940b205-50a6-4141-ab68-aa0464ae0f3d"}
{"_time":"2022-01-07T02:32:48.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-07T02:32:48.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":498.4,"item":"3c1f2b17-8bb0-43b9-8a52-e3d8d81fe311"}
{"_time":"2022-01-08T08:17:28.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-08T08:17:28.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":492.91,"item":"b5a8d6e1-9070-410d-bf44-72754b485faa"}
{"_time":"2022-01-09T15:12:23.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-09T15:12:23.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":493.92,"item":"9976f04f-3faf-46bd-80f6-1dc102632ec6"}
{"_time":"2022-01-10T02:11:28.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-10T02:11:28.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":495.04,"item":"ca02d3d3-a309-4b7b-ac12-29fa4a1a8704"}
{"_time":"2022-01-14T15:06:56.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-14T15:06:56.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":492.81,"item":"3940b205-50a6-4141-ab68-aa0464ae0f3d"}
{"_time":"2022-01-16T05:08:53.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-16T05:08:53.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":492.92,"item":"5a86942a-5bcc-41f7-9286-937b248caccc"}
{"_time":"2022-01-20T03:28:47.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-20T03:28:47.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":495.65,"item":"bcfd7a57-f36e-4b37-9b2d-795401f36459"}
{"_time":"2022-01-21T13:25:25.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-21T13:25:25.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":499.51,"item":"d6789f76-7ac6-415b-a2fa-8b56f80eef74"}
{"_time":"2022-01-23T06:10:21.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-23T06:10:21.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":491.33,"item":"d988eedb-2f3c-4ad5-82ab-7b1c25754ea0"}
{"_time":"2022-01-24T16:50:58.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-24T16:50:58.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":494.25,"item":"69718e27-44e6-4cb1-86ff-fc5b5d4c50a1"}
{"_time":"2022-01-26T20:56:58.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-26T20:56:58.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":492.23,"item":"87c91aeb-dba3-431e-bbda-f65f9164c64d"}
{"_time":"2022-01-26T22:57:18.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-26T22:57:18.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":491.84,"item":"804488a1-9724-465d-a596-1b6930510640"}
{"_time":"2022-01-29T08:46:35.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-29T08:46:35.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":490.18,"item":"bc323957-93e4-4aa8-8fc1-c73411e9ca0b"}
{"_time":"2022-01-29T17:21:29.000000000","_key":"5fec83d4-f5c6-4943-ab05-2b6760330daf","time":"2022-01-29T17:21:29.000000000","user":"5fec83d4-f5c6-4943-ab05-2b6760330daf","amount":491.56,"item":"ad811380-ac9c-4f6a-9015-ba2441abbff0"}
23 changes: 23 additions & 0 deletions python/pytests/parquet_source_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,26 @@ async def test_read_parquet_with_subsort(golden) -> None:

await source.add_file("../testdata/purchases/purchases_part2.parquet")
golden.jsonl(source)


# Verifies that we drain the output and progress channels correctly.
#
# When the parquet file contains more rows than
# (CHANNEL_SIZE / MAX_BATCH_SIZE), the channels previously filled
# up, causing the sender to block. This test verifies that the
# channels correctly drain, allowing the sender to continue.
# See https://github.com/kaskada-ai/kaskada/issues/775
async def test_large_parquet_file(golden) -> None:
source = await kd.sources.Parquet.create(
"../testdata/parquet/purchases_100k.parquet",
time_column="time",
key_column="user",
)
user = source.col("user")
amount = source.col("amount")

# Add a filter to reduce the output file size while ensuring the entire
# file is still processed
predicate = user.eq("5fec83d4-f5c6-4943-ab05-2b6760330daf").and_(amount.gt(490))
golden.jsonl(source.filter(predicate))

Binary file added testdata/parquet/purchases_100k.parquet
Binary file not shown.

0 comments on commit 2bab70f

Please sign in to comment.