From d8b297b0fc1889b08f6a7dc0dea8294766ebdd37 Mon Sep 17 00:00:00 2001 From: tricktx Date: Tue, 2 Jan 2024 12:46:14 -0300 Subject: [PATCH 1/6] Fix file renaming in read_csv function --- pipelines/datasets/br_stf_corte_aberta/utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index adac225c7..4d6cf9d4a 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -46,7 +46,12 @@ def web_scrapping(): def read_csv(): - arquivos = os.listdir(stf_constants.STF_INPUT.value) + arquivos = [ + f + for f in os.listdir(stf_constants.STF_INPUT.value) + if f.endswith(".crdownload") + ] + os.rename(arquivos[0], "arquivo.csv") log("Verificando dados dentro do container") log(arquivos) for arquivo in arquivos: From 40afea963a34d8747593a14444fa25e6b639e036 Mon Sep 17 00:00:00 2001 From: tricktx Date: Tue, 2 Jan 2024 13:12:48 -0300 Subject: [PATCH 2/6] fix endswith --- pipelines/datasets/br_stf_corte_aberta/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 4d6cf9d4a..8b61a4966 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -47,9 +47,7 @@ def web_scrapping(): def read_csv(): arquivos = [ - f - for f in os.listdir(stf_constants.STF_INPUT.value) - if f.endswith(".crdownload") + f for f in os.listdir(stf_constants.STF_INPUT.value) if not f.endswith(".csv") ] os.rename(arquivos[0], "arquivo.csv") log("Verificando dados dentro do container") From 45ab037d60108ba554710dbe6a0d8711cf5b7802 Mon Sep 17 00:00:00 2001 From: tricktx Date: Tue, 2 Jan 2024 13:29:00 -0300 Subject: [PATCH 3/6] fix end two --- pipelines/datasets/br_stf_corte_aberta/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 8b61a4966..8c2d9d211 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -46,10 +46,7 @@ def web_scrapping(): def read_csv(): - arquivos = [ - f for f in os.listdir(stf_constants.STF_INPUT.value) if not f.endswith(".csv") - ] - os.rename(arquivos[0], "arquivo.csv") + arquivos = os.listdir(stf_constants.STF_INPUT.value) log("Verificando dados dentro do container") log(arquivos) for arquivo in arquivos: @@ -128,7 +125,10 @@ def check_for_data(): log("Iniciando web scrapping") web_scrapping() log("Iniciando o check for data") - arquivos = os.listdir(stf_constants.STF_INPUT.value) + arquivos = [ + f for f in os.listdir(stf_constants.STF_INPUT.value) if not f.endswith(".csv") + ] + os.rename(arquivos[0], "decisoes.csv") log(arquivos) for arquivo in arquivos: if arquivo.endswith(".csv"): From fab9473a8d579a6e9d758de622543a55a2908ac6 Mon Sep 17 00:00:00 2001 From: tricktx Date: Tue, 2 Jan 2024 13:51:31 -0300 Subject: [PATCH 4/6] fix end two --- pipelines/datasets/br_stf_corte_aberta/utils.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 8c2d9d211..08b49e40c 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -124,12 +124,17 @@ def partition_data(df: pd.DataFrame, column_name: list[str], output_directory: s def check_for_data(): log("Iniciando web scrapping") web_scrapping() + log("Iniciando o check for data") - arquivos = [ - f for f in os.listdir(stf_constants.STF_INPUT.value) if not f.endswith(".csv") - ] - os.rename(arquivos[0], "decisoes.csv") - log(arquivos) + arquivos = os.listdir(stf_constants.STF_INPUT.value) + log(f"LISTANDO OS ARQUIVOS {arquivos}") + + arquivos_csv = [arquivo for arquivo in arquivos if arquivo.endswith(".csv")] + log(f"ARQUIVO_CSV = {arquivos_csv}") + if arquivos_csv: + os.rename(arquivos_csv[0], "decisoes.csv") + print(f"Arquivo renomeado para decisoes.csv: {arquivos_csv[0]}") + log(f" ARQUIVO FINAL {arquivos}") for arquivo in arquivos: if arquivo.endswith(".csv"): df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) From e9e99140012ec0c5cbb626559797c6e59a3d8a6f Mon Sep 17 00:00:00 2001 From: tricktx Date: Tue, 2 Jan 2024 16:13:51 -0300 Subject: [PATCH 5/6] time sleep --- pipelines/datasets/br_stf_corte_aberta/utils.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index 08b49e40c..c1f75519e 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -42,7 +42,8 @@ def web_scrapping(): driver.maximize_window() time.sleep(30) driver.find_element("xpath", '//*[@id="EXPORT-BUTTON-2"]/button').click() - time.sleep(30) + time.sleep(90) + driver.quit() def read_csv(): @@ -124,17 +125,9 @@ def partition_data(df: pd.DataFrame, column_name: list[str], output_directory: s def check_for_data(): log("Iniciando web scrapping") web_scrapping() - log("Iniciando o check for data") arquivos = os.listdir(stf_constants.STF_INPUT.value) - log(f"LISTANDO OS ARQUIVOS {arquivos}") - - arquivos_csv = [arquivo for arquivo in arquivos if arquivo.endswith(".csv")] - log(f"ARQUIVO_CSV = {arquivos_csv}") - if arquivos_csv: - os.rename(arquivos_csv[0], "decisoes.csv") - print(f"Arquivo renomeado para decisoes.csv: {arquivos_csv[0]}") - log(f" ARQUIVO FINAL {arquivos}") + log(arquivos) for arquivo in arquivos: if arquivo.endswith(".csv"): df = pd.read_csv(stf_constants.STF_INPUT.value + arquivo, dtype=str) From fa78471ce597a388db87ba5a7abe622a0aff5c32 Mon Sep 17 00:00:00 2001 From: tricktx Date: Tue, 2 Jan 2024 17:28:49 -0300 Subject: [PATCH 6/6] increase time --- pipelines/datasets/br_stf_corte_aberta/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelines/datasets/br_stf_corte_aberta/utils.py b/pipelines/datasets/br_stf_corte_aberta/utils.py index c1f75519e..c38dcf32a 100644 --- a/pipelines/datasets/br_stf_corte_aberta/utils.py +++ b/pipelines/datasets/br_stf_corte_aberta/utils.py @@ -36,11 +36,11 @@ def web_scrapping(): options.add_argument("--crash-dumps-dir=/tmp") options.add_argument("--remote-debugging-port=9222") driver = webdriver.Chrome(options=options) - time.sleep(30) + time.sleep(45) driver.get(stf_constants.STF_LINK.value) - time.sleep(30) + time.sleep(45) driver.maximize_window() - time.sleep(30) + time.sleep(45) driver.find_element("xpath", '//*[@id="EXPORT-BUTTON-2"]/button').click() time.sleep(90) driver.quit()