From e6781c822203acd33bb59412a0b426cce3143a46 Mon Sep 17 00:00:00 2001 From: Waldo Jaquith Date: Thu, 21 Nov 2024 21:38:55 -0500 Subject: [PATCH] Change to fetching files via HTTPS The era of FTP is over, long live HTTPS. DLAS now provides non-authenticated download of bulk data via HTTPS. Switch to fetching files like such. --- cron/fetch_csv.sh | 8 ++++++++ cron/sftp.sh | 10 ---------- deploy/crontab.txt | 14 +++++++------- deploy/sftp_credentials.sh | 7 +------ 4 files changed, 16 insertions(+), 23 deletions(-) create mode 100755 cron/fetch_csv.sh delete mode 100755 cron/sftp.sh diff --git a/cron/fetch_csv.sh b/cron/fetch_csv.sh new file mode 100755 index 0000000..71339e0 --- /dev/null +++ b/cron/fetch_csv.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -x + +filename="$1" +output_filename="$2" + +curl -o "$output_filename" https://lis.blob.core.windows.net/lisfiles/20{SESSION_LIS_ID}/"$filename" diff --git a/cron/sftp.sh b/cron/sftp.sh deleted file mode 100755 index b62c782..0000000 --- a/cron/sftp.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/expect - -set filename [lindex $argv 0] -set output_filename [lindex $argv 1] - -# Seriously, the path contains "CSV221," no matter hwhat the year or session -spawn sftp {LIS_FTP_USERNAME}@sftp.dlas.virginia.gov:/CSV221/csv{SESSION_LIS_ID}/$filename $output_filename -expect "password" -send "{LIS_FTP_PASSWORD}\n" -expect eof diff --git a/deploy/crontab.txt b/deploy/crontab.txt index 1b15195..6d98827 100644 --- a/deploy/crontab.txt +++ b/deploy/crontab.txt @@ -1,14 +1,14 @@ # The core update script. This runs hourly at 55 after, 4 minutes after DLAS outputs their data -55 6-23 * * * /usr/bin/expect rs-machine/cron/sftp.sh BILLS.CSV rs-machine/cron/bills.csv -55 6-23 * * * /usr/bin/expect rs-machine/cron/sftp.sh Committees.csv rs-machine/cron/committees.csv +55 6-23 * * * /bin/bash rs-machine/cron/fetch_csv.sh BILLS.CSV rs-machine/cron/bills.csv +55 6-23 * * * /bin/bash rs-machine/cron/fetch_csv.sh Committees.csv rs-machine/cron/committees.csv 55 6-23 * * * /usr/bin/php rs-machine/cron/update.php # The update script that handles the advancing of bills. This runs hourly at 2 after, 2 minutes after DLAS outputs their data. -02 6-23 * * * /usr/bin/expect rs-machine/cron/sftp.sh HISTORY.CSV rs-machine/cron/history.csv +02 6-23 * * * /bin/bash rs-machine/cron/fetch_csv.sh HISTORY.CSV rs-machine/cron/history.csv 02 6-23 * * * /usr/bin/php rs-machine/cron/update.php history # The daily script that collects and tallies votes. This runs at 3:00, an hour after DLAS outputs their data. -0 3 * * * /usr/bin/expect rs-machine/cron/sftp.sh VOTE.CSV rs-machine/cron/vote.csv +0 3 * * * /bin/bash rs-machine/cron/fetch_csv.sh VOTE.CSV rs-machine/cron/vote.csv 0 3 * * * /usr/bin/php rs-machine/cron/update.php vote # Collects scheduled meetings and hearings. @@ -18,7 +18,7 @@ 0 16,20 * * * /usr/bin/php rs-machine/cron/update.php minutes # Update the bill summaries (which is updated daily at 11:53). -59 11 * * * /usr/bin/expect rs-machine/cron/sftp.sh Summaries.csv rs-machine/cron/summaries.csv +59 11 * * * /bin/bash rs-machine/cron/fetch_csv.sh Summaries.csv rs-machine/cron/summaries.csv 59 11 * * * /usr/bin/php rs-machine/cron/update.php summaries # Update the bill full text. @@ -37,14 +37,14 @@ 41 02,16 * * * /usr/bin/php rs-machine/cron/update.php download_pdfs # Update the legislators listing -40 12 * * * /usr/bin/expect rs-machine/cron/sftp.sh Members.csv rs-machine/cron/members.csv +40 12 * * * /bin/bash rs-machine/cron/fetch_csv.sh Members.csv rs-machine/cron/members.csv 41 12 * * * /usr/bin/php rs-machine/cron/update.php legislators # Updates the sections of the code referred to within each bill. 5,15,25,35,45,55 * * 1-3,10-12 * /usr/bin/php rs-machine/cron/update.php code_sections # Update the fiscal_impact statements -52 11,13,15,17,19,21 * * * /usr/bin/expect rs-machine/cron/sftp.sh FiscalImpactStatements.csv rs-machine/cron/FiscalImpactStatements.csv +52 11,13,15,17,19,21 * * * /bin/bash rs-machine/cron/fetch_csv.sh FiscalImpactStatements.csv rs-machine/cron/FiscalImpactStatements.csv 53 11,13,15,17,19,21 * * * /usr/bin/php rs-machine/cron/update.php fiscal_impact # Re-zip the current year's downloadable bill full-text HTML. diff --git a/deploy/sftp_credentials.sh b/deploy/sftp_credentials.sh index ad196ef..b2b89ab 100755 --- a/deploy/sftp_credentials.sh +++ b/deploy/sftp_credentials.sh @@ -1,10 +1,5 @@ #!/bin/bash -# Set up FTP credentials in the SFTP script. -sed -i -e "s|{LIS_FTP_PASSWORD}|${LIS_FTP_PASSWORD}|g" cron/sftp.sh -sed -i -e "s|{LIS_FTP_USERNAME}|${LIS_FTP_USERNAME}|g" cron/sftp.sh - # Put the session ID into the SFTP path, so we fetch the correct LIS CSV SESSION_LIS_ID=$(grep -oP "SESSION_LIS_ID', '\K([0-9]{3})" includes/settings.inc.php) -sed -i -e "s|{SESSION_LIS_ID}|${SESSION_LIS_ID}|g" cron/sftp.sh - +sed -i -e "s|{SESSION_LIS_ID}|${SESSION_LIS_ID}|g" cron/fetch_csv.sh