From 44a070cbf0b161e667c9ba4f4d1d5b6fb0bbf15f Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 14:09:30 -0300 Subject: [PATCH 01/13] fix click in button download stf --- pipelines/datasets/br_stf_corte_aberta/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 733661a7e..4e826b4b6 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -41,7 +41,7 @@ def web_scrapping(): time.sleep(10) driver.maximize_window() time.sleep(15) - WebDriverWait(driver, 60).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="EXPORT-BUTTON-2"]/button'))).click() + WebDriverWait(driver, 60).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="EXPORT-BUTTON-PADRAO"]'))).click() time.sleep(15) driver.quit() From 91a66999a7cd682a67b410043d22b1a05157e270 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 14:37:49 -0300 Subject: [PATCH 02/13] fix before assignment --- .../datasets/br_stf_corte_aberta/utils.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 4e826b4b6..40f6c4525 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -132,14 +132,14 @@ def check_for_data(): if arquivo.endswith(".csv"): df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) - df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] - data_obj = df["Data da decisão"] = ( - df["Data da decisão"].astype(str).str[6:10] - + "-" - + df["Data da decisão"].astype(str).str[3:5] - + "-" - + df["Data da decisão"].astype(str).str[0:2] - ) - data_obj = data_obj.max() - - return data_obj + df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] + data_obj = df["Data da decisão"] = ( + df["Data da decisão"].astype(str).str[6:10] + + "-" + + df["Data da decisão"].astype(str).str[3:5] + + "-" + + df["Data da decisão"].astype(str).str[0:2] + ) + data_obj = data_obj.max() + + return data_obj \ No newline at end of file From 13141e66edff7b5c034080c262a43a27959d524b Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 17:28:45 -0300 Subject: [PATCH 03/13] fix get files in input --- .../datasets/br_stf_corte_aberta/utils.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 40f6c4525..bcdd3120b 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -129,17 +129,20 @@ def check_for_data(): arquivos = os.listdir(stf_constants.STF_INPUT.value) log(arquivos) for arquivo in arquivos: - if arquivo.endswith(".csv"): - df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) + try: + if arquivo.endswith(".xlsx") or arquivo.endswith(".csv"): + df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) + except FileNotFoundError as error: + log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") + + df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] + data_obj = df["Data da decisão"] = ( + df["Data da decisão"].astype(str).str[6:10] + + "-" + + df["Data da decisão"].astype(str).str[3:5] + + "-" + + df["Data da decisão"].astype(str).str[0:2] + ) + data_obj = data_obj.max() - df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] - data_obj = df["Data da decisão"] = ( - df["Data da decisão"].astype(str).str[6:10] - + "-" - + df["Data da decisão"].astype(str).str[3:5] - + "-" - + df["Data da decisão"].astype(str).str[0:2] - ) - data_obj = data_obj.max() - - return data_obj \ No newline at end of file + return data_obj \ No newline at end of file From cdc3c544489f348100b9c1d308e3f31e2d39299e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Jan 2025 20:29:20 +0000 Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/datasets/br_stf_corte_aberta/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index bcdd3120b..730fe2bc8 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -134,7 +134,7 @@ def check_for_data(): df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) except FileNotFoundError as error: log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") - + df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] data_obj = df["Data da decisão"] = ( df["Data da decisão"].astype(str).str[6:10] From 7cb890c6af2c8a4b1ab231fd81fb9594c8c0db74 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 17:34:22 -0300 Subject: [PATCH 05/13] fix read file --- pipelines/datasets/br_stf_corte_aberta/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 730fe2bc8..0ebcdc414 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -130,7 +130,9 @@ def check_for_data(): log(arquivos) for arquivo in arquivos: try: - if arquivo.endswith(".xlsx") or arquivo.endswith(".csv"): + if arquivo.endswith(".xlsx"): + df = pd.read_excel(stf_constants.STF_INPUT.value + arquivo, dtype=str) + elif arquivo.endswith(".csv"): df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) except FileNotFoundError as error: log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") From eb13326eb678a5b60997f42b0318e6fc6e7439b0 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 18:07:23 -0300 Subject: [PATCH 06/13] fix columns date --- pipelines/datasets/br_stf_corte_aberta/utils.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 0ebcdc414..69606b1e4 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -57,16 +57,12 @@ def read_csv(): def fix_columns_data(df): - lista = ["Data de autuação", "Data da decisão", "Data baixa"] + lista = ["Data de autuação", "Data da decisão", "Data baixa", "Observação do andamento"] for x in lista: - df[x] = df[x].astype(str).str[0:10] - df[x] = ( - df[x].astype(str).str[6:10] - + "-" - + df[x].astype(str).str[3:5] - + "-" - + df[x].astype(str).str[0:2] - ) + if len(df[x]) == 1: + df[x] = df[x].astype(str).replace("-", '') + df[x] = df[x].astype(str).replace("/", "-") + return df From 8a5b525f859c8f6c94c33308687756c307569777 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Jan 2025 21:08:05 +0000 Subject: [PATCH 07/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/datasets/br_stf_corte_aberta/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 69606b1e4..c24d0917d 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -62,7 +62,7 @@ def fix_columns_data(df): if len(df[x]) == 1: df[x] = df[x].astype(str).replace("-", '') df[x] = df[x].astype(str).replace("/", "-") - + return df From 67c8420cf6149b28af9e7fea112f83e924c0971a Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 18:29:34 -0300 Subject: [PATCH 08/13] fix columns date max --- pipelines/datasets/br_stf_corte_aberta/utils.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index c24d0917d..e3a7d8f2e 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -51,8 +51,13 @@ def read_csv(): log("Verificando dados dentro do container") log(arquivos) for arquivo in arquivos: - if arquivo.endswith(".csv"): - df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) + try: + if arquivo.endswith(".xlsx"): + df = pd.read_excel(stf_constants.STF_INPUT.value + arquivo, dtype=str) + elif arquivo.endswith(".csv"): + df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) + except FileNotFoundError as error: + log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") return df @@ -134,13 +139,7 @@ def check_for_data(): log(f"Arquivo não encontrado! Verificando o input: {stf_constants.STF_INPUT.value + arquivo}") df["Data da decisão"] = df["Data da decisão"].astype(str).str[0:10] - data_obj = df["Data da decisão"] = ( - df["Data da decisão"].astype(str).str[6:10] - + "-" - + df["Data da decisão"].astype(str).str[3:5] - + "-" - + df["Data da decisão"].astype(str).str[0:2] - ) + data_obj = df["Data da decisão"].astype(str).replace("/", "-") data_obj = data_obj.max() return data_obj \ No newline at end of file From 44184223f2ff7b3fe67458d776dbfc458906012d Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 18:56:24 -0300 Subject: [PATCH 09/13] value counts() --- pipelines/datasets/br_stf_corte_aberta/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index e3a7d8f2e..b4dcb4b28 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -67,7 +67,8 @@ def fix_columns_data(df): if len(df[x]) == 1: df[x] = df[x].astype(str).replace("-", '') df[x] = df[x].astype(str).replace("/", "-") - + log(df[x].value_counts()) + return df From 226fa28e09d2b923b71663bb8dbc974864cd3d7d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Jan 2025 21:58:49 +0000 Subject: [PATCH 10/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/datasets/br_stf_corte_aberta/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index b4dcb4b28..313f58682 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -68,7 +68,7 @@ def fix_columns_data(df): df[x] = df[x].astype(str).replace("-", '') df[x] = df[x].astype(str).replace("/", "-") log(df[x].value_counts()) - + return df From 178e7cee69fa5c93e965bf440d4eeddaa0a40c64 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 20:01:39 -0300 Subject: [PATCH 11/13] fix data autuacao --- pipelines/datasets/br_stf_corte_aberta/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 313f58682..1ab68b6b7 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -62,12 +62,12 @@ def read_csv(): def fix_columns_data(df): - lista = ["Data de autuação", "Data da decisão", "Data baixa", "Observação do andamento"] - for x in lista: - if len(df[x]) == 1: - df[x] = df[x].astype(str).replace("-", '') - df[x] = df[x].astype(str).replace("/", "-") - log(df[x].value_counts()) + lista = ["Data de autuação", "Data da decisão", "Data baixa"] + for x in lista: + df[x] = df[x].astype(str) + if len(df[x]) == 1: + df[x] = df[x].replace("-", '') + df[x] = df[x].replace("/", "-").astype(str) return df From 7eeb9cb471c2ff974eee7799229db1de34c5e594 Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 20:12:58 -0300 Subject: [PATCH 12/13] register flow --- pipelines/datasets/br_stf_corte_aberta/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 1ab68b6b7..c66270c92 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -68,7 +68,6 @@ def fix_columns_data(df): if len(df[x]) == 1: df[x] = df[x].replace("-", '') df[x] = df[x].replace("/", "-").astype(str) - return df From 53776cc696cafd2f7ee27efda3357a7d8436f94c Mon Sep 17 00:00:00 2001 From: tricktx Date: Thu, 2 Jan 2025 20:23:58 -0300 Subject: [PATCH 13/13] register flow part 2 --- pipelines/datasets/br_stf_corte_aberta/utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index c66270c92..3499b756f 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -63,11 +63,12 @@ def read_csv(): def fix_columns_data(df): lista = ["Data de autuação", "Data da decisão", "Data baixa"] - for x in lista: - df[x] = df[x].astype(str) - if len(df[x]) == 1: - df[x] = df[x].replace("-", '') - df[x] = df[x].replace("/", "-").astype(str) + for x in lista: + df[x] = df[x].astype(str) + if len(df[x]) == 1: + df[x] = df[x].replace("-", '') + df[x] = df[x].replace("/", "-").astype(str) + log(df[x].value_counts()) return df