From 011dfd4a0ad88c909b857884b2c993155fcf7e06 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 14:09:30 -0300 Subject: [PATCH 01/13] fix click in button download stf --- pipelines/datasets/br_stf_corte_aberta/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 733661a7e..4e826b4b6 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -41,7 +41,7 @@ def web_scrapping(): time.sleep(10) driver.maximize_window() time.sleep(15) - WebDriverWait(driver, 60).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="EXPORT-BUTTON-2"]/button'))).click() + WebDriverWait(driver, 60).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="EXPORT-BUTTON-PADRAO"]'))).click() time.sleep(15) driver.quit() From 77588162265c3e43feaaf45753b0f6b334ddc19e Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 14:37:49 -0300 Subject: [PATCH 02/13] fix before assignment --- .../datasets/br_stf_corte_aberta/utils.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 4e826b4b6..40f6c4525 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -132,14 +132,14 @@ def check_for_data(): if arquivo.endswith(".csv"): df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) - df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] - data_obj = df["Data da decisão"] = ( - df["Data da decisão"].astype(str).str[6:10] - + "-" - + df["Data da decisão"].astype(str).str[3:5] - + "-" - + df["Data da decisão"].astype(str).str[0:2] - ) - data_obj = data_obj.max() - - return data_obj + df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] + data_obj = df["Data da decisão"] = ( + df["Data da decisão"].astype(str).str[6:10] + + "-" + + df["Data da decisão"].astype(str).str[3:5] + + "-" + + df["Data da decisão"].astype(str).str[0:2] + ) + data_obj = data_obj.max() + + return data_obj \ No newline at end of file From 3f9cee43f68ec43d5fe85d02dfaea2371a7774e7 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 17:28:45 -0300 Subject: [PATCH 03/13] fix get files in input --- .../datasets/br_stf_corte_aberta/utils.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 40f6c4525..bcdd3120b 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -129,17 +129,20 @@ def check_for_data(): arquivos = os.listdir(stf_constants.STF_INPUT.value) log(arquivos) for arquivo in arquivos: - if arquivo.endswith(".csv"): - df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) + try: + if arquivo.endswith(".xlsx") or arquivo.endswith(".csv"): + df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) + except FileNotFoundError as error: + log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") + + df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] + data_obj = df["Data da decisão"] = ( + df["Data da decisão"].astype(str).str[6:10] + + "-" + + df["Data da decisão"].astype(str).str[3:5] + + "-" + + df["Data da decisão"].astype(str).str[0:2] + ) + data_obj = data_obj.max() - df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] - data_obj = df["Data da decisão"] = ( - df["Data da decisão"].astype(str).str[6:10] - + "-" - + df["Data da decisão"].astype(str).str[3:5] - + "-" - + df["Data da decisão"].astype(str).str[0:2] - ) - data_obj = data_obj.max() - - return data_obj \ No newline at end of file + return data_obj \ No newline at end of file From 3525ddfb6a885272cedcc96e308a675db58eaa7a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Jan 2025 20:29:20 +0000 Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/datasets/br_stf_corte_aberta/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index bcdd3120b..730fe2bc8 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -134,7 +134,7 @@ def check_for_data(): df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) except FileNotFoundError as error: log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") - + df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] data_obj = df["Data da decisão"] = ( df["Data da decisão"].astype(str).str[6:10] From 56a48b704b31136f03606f370aa48b69afaf1441 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 17:34:22 -0300 Subject: [PATCH 05/13] fix read file --- pipelines/datasets/br_stf_corte_aberta/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 730fe2bc8..0ebcdc414 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -130,7 +130,9 @@ def check_for_data(): log(arquivos) for arquivo in arquivos: try: - if arquivo.endswith(".xlsx") or arquivo.endswith(".csv"): + if arquivo.endswith(".xlsx"): + df = pd.read_excel(stf_constants.STF_INPUT.value + arquivo, dtype=str) + elif arquivo.endswith(".csv"): df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) except FileNotFoundError as error: log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") From f7156d3900d9a19bb020cfcfd3a14d79104b193a Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 18:07:23 -0300 Subject: [PATCH 06/13] fix columns date --- pipelines/datasets/br_stf_corte_aberta/utils.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 0ebcdc414..69606b1e4 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -57,16 +57,12 @@ def read_csv(): def fix_columns_data(df): - lista = ["Data de autuação", "Data da decisão", "Data baixa"] + lista = ["Data de autuação", "Data da decisão", "Data baixa", "Observação do andamento"] for x in lista: - df[x] = df[x].astype(str).str[0:10] - df[x] = ( - df[x].astype(str).str[6:10] - + "-" - + df[x].astype(str).str[3:5] - + "-" - + df[x].astype(str).str[0:2] - ) + if len(df[x]) == 1: + df[x] = df[x].astype(str).replace("-", '') + df[x] = df[x].astype(str).replace("/", "-") + return df From 39d29f5aff4cfe7322e00ab266cb6c35624be0f7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Jan 2025 21:08:05 +0000 Subject: [PATCH 07/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/datasets/br_stf_corte_aberta/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 69606b1e4..c24d0917d 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -62,7 +62,7 @@ def fix_columns_data(df): if len(df[x]) == 1: df[x] = df[x].astype(str).replace("-", '') df[x] = df[x].astype(str).replace("/", "-") - + return df From 0f512f52bc204547310da2b332795fa35f519a50 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 18:29:34 -0300 Subject: [PATCH 08/13] fix columns date max --- pipelines/datasets/br_stf_corte_aberta/utils.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index c24d0917d..e3a7d8f2e 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -51,8 +51,13 @@ def read_csv(): log("Verificando dados dentro do container") log(arquivos) for arquivo in arquivos: - if arquivo.endswith(".csv"): - df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) + try: + if arquivo.endswith(".xlsx"): + df = pd.read_excel(stf_constants.STF_INPUT.value + arquivo, dtype=str) + elif arquivo.endswith(".csv"): + df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) + except FileNotFoundError as error: + log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") return df @@ -134,13 +139,7 @@ def check_for_data(): log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] - data_obj = df["Data da decisão"] = ( - df["Data da decisão"].astype(str).str[6:10] - + "-" - + df["Data da decisão"].astype(str).str[3:5] - + "-" - + df["Data da decisão"].astype(str).str[0:2] - ) + data_obj = df["Data da decisão"].astype(str).replace("/", "-") data_obj = data_obj.max() return data_obj \ No newline at end of file From a225a8f390c4c56775db61054a8aa644b93ddffd Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 18:56:24 -0300 Subject: [PATCH 09/13] value counts() --- pipelines/datasets/br_stf_corte_aberta/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index e3a7d8f2e..b4dcb4b28 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -67,7 +67,8 @@ def fix_columns_data(df): if len(df[x]) == 1: df[x] = df[x].astype(str).replace("-", '') df[x] = df[x].astype(str).replace("/", "-") - + log(df[x].value_counts()) + return df From 237fce98e3c8f977f10e6c9b86d026a2b82cae48 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Jan 2025 21:58:49 +0000 Subject: [PATCH 10/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/datasets/br_stf_corte_aberta/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index b4dcb4b28..313f58682 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -68,7 +68,7 @@ def fix_columns_data(df): df[x] = df[x].astype(str).replace("-", '') df[x] = df[x].astype(str).replace("/", "-") log(df[x].value_counts()) - + return df From ba1e7b16e339fb161931d12da25cb88f87e13e72 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 20:01:39 -0300 Subject: [PATCH 11/13] fix data autuacao --- pipelines/datasets/br_stf_corte_aberta/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 313f58682..1ab68b6b7 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -62,12 +62,12 @@ def read_csv(): def fix_columns_data(df): - lista = ["Data de autuação", "Data da decisão", "Data baixa", "Observação do andamento"] - for x in lista: - if len(df[x]) == 1: - df[x] = df[x].astype(str).replace("-", '') - df[x] = df[x].astype(str).replace("/", "-") - log(df[x].value_counts()) + lista = ["Data de autuação", "Data da decisão", "Data baixa"] + for x in lista: + df[x] = df[x].astype(str) + if len(df[x]) == 1: + df[x] = df[x].replace("-", '') + df[x] = df[x].replace("/", "-").astype(str) return df From a3777be23e9cb8498224d3824f8abe6e8b908691 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 20:12:58 -0300 Subject: [PATCH 12/13] register flow --- pipelines/datasets/br_stf_corte_aberta/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 1ab68b6b7..c66270c92 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -68,7 +68,6 @@ def fix_columns_data(df): if len(df[x]) == 1: df[x] = df[x].replace("-", '') df[x] = df[x].replace("/", "-").astype(str) - return df From dfad87f8fc0219185ebbc35a0cf5a113cbcb2d4e Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 20:23:58 -0300 Subject: [PATCH 13/13] register flow part 2 --- pipelines/datasets/br_stf_corte_aberta/utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index c66270c92..3499b756f 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -63,11 +63,12 @@ def read_csv(): def fix_columns_data(df): lista = ["Data de autuação", "Data da decisão", "Data baixa"] - for x in lista: - df[x] = df[x].astype(str) - if len(df[x]) == 1: - df[x] = df[x].replace("-", '') - df[x] = df[x].replace("/", "-").astype(str) + for x in lista: + df[x] = df[x].astype(str) + if len(df[x]) == 1: + df[x] = df[x].replace("-", '') + df[x] = df[x].replace("/", "-").astype(str) + log(df[x].value_counts()) return df