Fetch Paginated JSON and Update CSV #15
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Fetch Paginated JSON and Update CSV | |
on: | |
schedule: | |
- cron: "0 0 * * *" # Runs once a day at midnight (UTC) | |
workflow_dispatch: # Allows for manual runs as well | |
push: | |
branches: | |
- main | |
jobs: | |
update_csv: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.x' | |
- name: Install jq and csvkit | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y jq | |
pip install csvkit | |
- name: Fetch paginated JSON data from API | |
env: | |
API_URL: ${{ secrets.API_URL }} | |
API_TOKEN: ${{ secrets.API_TOKEN }} | |
run: | | |
# Initialize page and limit parameters | |
PAGE=1 | |
LIMIT=250 | |
AGGREGATE_FILE=data.json | |
echo "[]" > $AGGREGATE_FILE # Initialize an empty JSON array | |
while true; do | |
# Fetch a page of data from the API | |
RESPONSE=$(curl -s -X GET \ | |
-H "Authorization: Bearer $API_TOKEN" \ | |
--data-urlencode "page=$PAGE" \ | |
--data-urlencode "limit=$LIMIT" \ | |
--data-urlencode "verified[eq]=true" \ | |
"$API_URL") | |
# Check if the response is valid JSON and an array | |
if ! echo "$RESPONSE" | jq 'if type == "array" then . else error("Expected an array but got another type") end' > /dev/null 2>&1; then | |
echo "Error: Expected an array but received a different data structure from the API." | |
exit 1 | |
fi | |
# Parse response and check if it's empty | |
PAGE_DATA=$(echo "$RESPONSE" | jq '.') | |
if [ "$(echo "$PAGE_DATA" | jq 'length')" -eq 0 ]; then | |
echo "No more data to fetch. Exiting pagination loop." | |
break | |
fi | |
# Append the page data to the aggregate JSON array | |
jq -s '.[0] + .[1]' $AGGREGATE_FILE <(echo "$PAGE_DATA") > tmp.json && mv tmp.json $AGGREGATE_FILE | |
# Increment page number for the next request | |
PAGE=$((PAGE + 1)) | |
done | |
- name: Filter JSON to only include school names and save as school.csv | |
run: | | |
# Add first line to CSV file | |
echo "\"Below is a list of all schools that we've manually verified. If you see one that hasn't been included, please add it via my.mlh.io and we'll verify it soon.\"" > schools.csv | |
# Wrap each "name" in an array to use @csv for correct CSV formatting | |
jq -r '.[].name | [.] | @csv' data.json >> schools.csv | |
- name: Commit and push CSV file | |
uses: stefanzweifel/git-auto-commit-action@v5 | |
with: | |
commit_message: Update schools CSV file from API data [skip ci] | |
file_pattern: schools.csv | |
commit_user_name: github-actions[bot] | |
commit_user_email: github-actions[bot]@users.noreply.github.com | |
commit_author: github-actions[bot] <github-actions[bot]@users.noreply.github.com> | |
skip_fetch: true | |
skip_checkout: true |