-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds the following aggregations: count_if, max, mean, min, stddev, and variance
- Loading branch information
1 parent
bf2ef0d
commit c755d60
Showing
29 changed files
with
599 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import pytest | ||
import kaskada as kd | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def count_if_source() -> kd.sources.CsvString: | ||
content = "\n".join( | ||
[ | ||
"time,key,m,n,is_valid", | ||
"1996-12-19T16:39:57,A,5,10,true", | ||
"1996-12-19T16:39:58,B,24,3,true", | ||
"1996-12-19T16:39:59,A,17,6,false", | ||
"1996-12-19T16:40:00,A,,9,false", | ||
"1996-12-19T16:40:01,A,12,,true", | ||
"1996-12-19T16:40:02,A,,,", | ||
"1996-12-19T16:40:03,B,26,12,true", | ||
"1996-12-19T16:40:04,B,30,1,true", | ||
] | ||
) | ||
return kd.sources.CsvString(content, time_column_name="time", key_column_name="key") | ||
|
||
|
||
def test_count_if_unwindowed(count_if_source, golden) -> None: | ||
is_valid = count_if_source.col("is_valid") | ||
m = count_if_source.col("m") | ||
golden.jsonl( | ||
kd.record( | ||
{ | ||
"is_valid": is_valid, | ||
"count_if": is_valid.count_if(), | ||
"m": m, | ||
} | ||
) | ||
) | ||
|
||
|
||
def test_count_if_windowed(count_if_source, golden) -> None: | ||
is_valid = count_if_source.col("is_valid") | ||
m = count_if_source.col("m") | ||
golden.jsonl( | ||
kd.record( | ||
{ | ||
"is_valid": is_valid, | ||
"count_if": is_valid.count_if(window=kd.windows.Since(m > 25)), | ||
"m": m, | ||
} | ||
) | ||
) | ||
|
||
|
||
def test_count_if_since_true(count_if_source, golden) -> None: | ||
is_valid = count_if_source.col("is_valid") | ||
m = count_if_source.col("m") | ||
golden.jsonl( | ||
kd.record( | ||
{ | ||
"is_valid": is_valid, | ||
"count_if": is_valid.count_if(window=kd.windows.Since(True)), | ||
"m": m, | ||
} | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import kaskada as kd | ||
|
||
import pytest | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def source() -> kd.sources.CsvString: | ||
content = "\n".join( | ||
[ | ||
"time,key,m,n", | ||
"1996-12-19T16:39:57,A,5,10", | ||
"1996-12-19T16:39:58,B,24,3", | ||
"1996-12-19T16:39:59,A,17,6", | ||
"1996-12-19T16:40:00,A,,9", | ||
"1996-12-19T16:40:01,A,12,", | ||
"1996-12-19T16:40:02,A,,", | ||
] | ||
) | ||
return kd.sources.CsvString(content, time_column_name="time", key_column_name="key") | ||
|
||
|
||
def test_max_unwindowed(source, golden) -> None: | ||
m = source.col("m") | ||
n = source.col("n") | ||
golden.jsonl(kd.record({"m": m, "max_m": m.max(), "n": n, "max_n": n.max()})) | ||
|
||
|
||
def test_max_windowed(source, golden) -> None: | ||
m = source.col("m") | ||
n = source.col("n") | ||
golden.jsonl( | ||
kd.record( | ||
{ | ||
"m": m, | ||
"max_m": m.max(window=kd.windows.Since(m > 20)), | ||
"n": n, | ||
"max_n": n.max(window=kd.windows.Sliding(2, m > 10)), | ||
} | ||
) | ||
) | ||
|
||
|
||
def test_max_since_true(source, golden) -> None: | ||
# `since(True)` should be the same as unwindowed, so equals the original vaule. | ||
m_max_since_true = kd.record( | ||
{ | ||
"m": source.col("m"), | ||
"m_max": source.col("m").max(window=kd.windows.Since(True)), | ||
} | ||
) | ||
golden.jsonl(m_max_since_true) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import kaskada as kd | ||
|
||
import pytest | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def source() -> kd.sources.CsvString: | ||
content = "\n".join( | ||
[ | ||
"time,key,m,n", | ||
"1996-12-19T16:39:57,A,5,10", | ||
"1996-12-19T16:39:58,B,24,3", | ||
"1996-12-19T16:39:59,A,17,6", | ||
"1996-12-19T16:40:00,A,,9", | ||
"1996-12-19T16:40:01,A,12,", | ||
"1996-12-19T16:40:02,A,,", | ||
] | ||
) | ||
return kd.sources.CsvString(content, time_column_name="time", key_column_name="key") | ||
|
||
|
||
def test_mean_unwindowed(source, golden) -> None: | ||
m = source.col("m") | ||
n = source.col("n") | ||
golden.jsonl(kd.record({"m": m, "mean_m": m.mean(), "n": n, "mean_n": n.mean()})) | ||
|
||
|
||
def test_mean_windowed(source, golden) -> None: | ||
m = source.col("m") | ||
n = source.col("n") | ||
golden.jsonl( | ||
kd.record( | ||
{ | ||
"m": m, | ||
"mean_m": m.mean(window=kd.windows.Since(m > 20)), | ||
"n": n, | ||
"mean_n": n.mean(window=kd.windows.Sliding(2, m > 10)), | ||
} | ||
) | ||
) | ||
|
||
|
||
def test_mean_since_true(source, golden) -> None: | ||
# `since(True)` should be the same as unwindowed, so equals the original vaule. | ||
m_mean_since_true = kd.record( | ||
{ | ||
"m": source.col("m"), | ||
"m_mean": source.col("m").mean(window=kd.windows.Since(True)), | ||
} | ||
) | ||
golden.jsonl(m_mean_since_true) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import kaskada as kd | ||
|
||
import pytest | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def source() -> kd.sources.CsvString: | ||
content = "\n".join( | ||
[ | ||
"time,key,m,n", | ||
"1996-12-19T16:39:57,A,5,10", | ||
"1996-12-19T16:39:58,B,24,3", | ||
"1996-12-19T16:39:59,A,17,6", | ||
"1996-12-19T16:40:00,A,,9", | ||
"1996-12-19T16:40:01,A,12,", | ||
"1996-12-19T16:40:02,A,,", | ||
] | ||
) | ||
return kd.sources.CsvString(content, time_column_name="time", key_column_name="key") | ||
|
||
|
||
def test_min_unwindowed(source, golden) -> None: | ||
m = source.col("m") | ||
n = source.col("n") | ||
golden.jsonl(kd.record({"m": m, "min_m": m.min(), "n": n, "min_n": n.min()})) | ||
|
||
|
||
def test_min_windowed(source, golden) -> None: | ||
m = source.col("m") | ||
n = source.col("n") | ||
golden.jsonl( | ||
kd.record( | ||
{ | ||
"m": m, | ||
"min_m": m.min(window=kd.windows.Since(m > 20)), | ||
"n": n, | ||
"min_n": n.min(window=kd.windows.Sliding(2, m > 10)), | ||
} | ||
) | ||
) | ||
|
||
|
||
def test_min_since_true(source, golden) -> None: | ||
# `since(True)` should be the same as unwindowed, so equals the original vaule. | ||
m_min_since_true = kd.record( | ||
{ | ||
"m": source.col("m"), | ||
"m_min": source.col("m").min(window=kd.windows.Since(True)), | ||
} | ||
) | ||
golden.jsonl(m_min_since_true) |
Oops, something went wrong.