Skip to content

Commit

Permalink
fix webdriver stf
Browse files Browse the repository at this point in the history
  • Loading branch information
tricktx committed Feb 11, 2025
1 parent cc2e87c commit 7c8e886
Showing 1 changed file with 32 additions and 20 deletions.
52 changes: 32 additions & 20 deletions pipelines/datasets/br_stf_corte_aberta/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,47 @@
from pipelines.datasets.br_stf_corte_aberta.constants import constants as stf_constants
from pipelines.utils.utils import log
from selenium.webdriver.firefox.options import Options
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager

def web_scrapping():
log("Criando as pastas")
if not os.path.exists(stf_constants.STF_INPUT.value):
os.mkdir(stf_constants.STF_INPUT.value)
options = Options()

options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-extensions")
options.add_argument("--incognito")

# Configurações específicas de download no Firefox
options.set_preference("browser.download.folderList", 2) # Use 2 para salvar no diretório especificado
options.set_preference("browser.download.dir", stf_constants.STF_INPUT.value)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv") # Specify MIME type for automatic download
options.set_preference("browser.download.manager.showWhenStarting", False)
options.set_preference("pdfjs.disabled", True) # Desativa o visualizador de PDFs interno

options = webdriver.ChromeOptions()
# https://github.com/SeleniumHQ/selenium/issues/11637
prefs = {
"download.default_directory": stf_constants.STF_INPUT.value,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True,
}
options.add_experimental_option(
"prefs",
prefs,
)
options.add_argument("--headless=new")
options.add_argument("--test-type")
options.add_argument("--disable-gpu")
options.add_argument("--no-first-run")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-default-browser-check")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--start-maximized")
options.add_argument(
"user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
)
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)

driver = webdriver.Firefox(options=options)
driver.get("https://transparencia.stf.jus.br/extensions/decisoes/decisoes.html")
time.sleep(10)
time.sleep(30)
driver.maximize_window()
time.sleep(15)
WebDriverWait(driver, 60).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="EXPORT-BUTTON-PADRAO"]'))).click()
time.sleep(15)
time.sleep(45)
WebDriverWait(driver, 180).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="EXPORT-BUTTON-PADRAO"]'))).click()
time.sleep(30)
driver.quit()


Expand Down

0 comments on commit 7c8e886

Please sign in to comment.