From f3d7a1113f3a63234fda774b3e2f04da53061769 Mon Sep 17 00:00:00 2001 From: Dazhong Xia Date: Wed, 1 Nov 2023 09:10:17 -0400 Subject: [PATCH 1/2] Switch regex strategy to sampling strategy to improve performance --- test/unit/io_managers_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/io_managers_test.py b/test/unit/io_managers_test.py index a4ba43efc2..ce9fe14865 100644 --- a/test/unit/io_managers_test.py +++ b/test/unit/io_managers_test.py @@ -354,7 +354,7 @@ def test_ferc_xbrl_sqlite_io_manager_dedupes(mocker, tmp_path): example_schema = pandera.DataFrameSchema( { "entity_id": pandera.Column( - str, pandera.Check.str_matches(r"[0-9a-zA-Z]+"), nullable=False + str, pandera.Check.isin(r"C0123456789"), nullable=False ), "date": pandera.Column("datetime64[ns]", nullable=False), "utility_type": pandera.Column( @@ -365,7 +365,7 @@ def test_ferc_xbrl_sqlite_io_manager_dedupes(mocker, tmp_path): "publication_time": pandera.Column("datetime64[ns]", nullable=False), "int_factoid": pandera.Column(int), "float_factoid": pandera.Column(float), - "str_factoid": pandera.Column("str"), + "str_factoid": pandera.Column(str), } ) From e5138a69745a78320c7aff1c029446550dd98067 Mon Sep 17 00:00:00 2001 From: Dazhong Xia Date: Wed, 1 Nov 2023 11:54:53 -0400 Subject: [PATCH 2/2] Increase deadline --- test/unit/io_managers_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/unit/io_managers_test.py b/test/unit/io_managers_test.py index ce9fe14865..89d8a80988 100644 --- a/test/unit/io_managers_test.py +++ b/test/unit/io_managers_test.py @@ -354,7 +354,7 @@ def test_ferc_xbrl_sqlite_io_manager_dedupes(mocker, tmp_path): example_schema = pandera.DataFrameSchema( { "entity_id": pandera.Column( - str, pandera.Check.isin(r"C0123456789"), nullable=False + str, pandera.Check.isin("C0123456789"), nullable=False ), "date": pandera.Column("datetime64[ns]", nullable=False), "utility_type": pandera.Column( @@ -370,6 +370,7 @@ def test_ferc_xbrl_sqlite_io_manager_dedupes(mocker, tmp_path): ) +@hypothesis.settings(print_blob=True, deadline=400) @hypothesis.given(example_schema.strategy(size=3)) def test_filter_for_freshest_data(df): # XBRL context is the identifying metadata for reported values