Skip to content

Commit

Permalink
feat: add script to get annotations from errored actions (#383)
Browse files Browse the repository at this point in the history
  • Loading branch information
Rebecca Graber committed Aug 2, 2023
1 parent fa3f8a4 commit 2ccf0fd
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 1 deletion.
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Change Log
Unreleased
~~~~~~~~~~
*
* Add script to get github action errors

[2.0.0] - 2023-06-01
~~~~~~~~~~~~~~~~~~~~
Expand Down
138 changes: 138 additions & 0 deletions edx_arch_experiments/scripts/get_action_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
Script to get the annotations from all failed checks in edx-platform after a given date
Gets all the commits to master after the date, then for each commit gets each check suite, then for each failed check
suite gets each run. Collects the annotations for all the failed runs. The annotations will sometimes contain useful
error messages, sometimes just the exit code. Getting the full logs requires admin permissions to edx-platform so it's
not included in this script.
Example output row:
commit_date,run_started_at,run_completed_at,commit_hash,name,message
2023-07-26T20:59:01Z,2023-07-27T06:56:23Z,2023-07-27T07:01:58Z,06e738e64a3485ecec037a9b8a36cf4ae145ea8a,
upgrade-one-python-dependency-workflow,Process completed with exit code 2.
This script takes a pretty long time to run (15m for 2 months) and there is a risk if you look too far back you will hit
your API limit.
"""

from csv import DictWriter
from datetime import datetime

import click
import requests


@click.command()
@click.option('--token', envvar='GITHUB_TOKEN')
@click.option('--start_date', type=click.DateTime(formats=["%Y-%m-%d"]), help="Date of earliest commit")
@click.option('--filename', help="Where to write the data")
def get_errors_from_date(token, start_date, filename):
"""
Creates a csv documenting the annotations from all failed runs for commits to edx-platform after the given date
Parameters:
token (string): The GitHub API token. Retrieved from the env GITHUB_TOKEN variable
start_date (date): The earliest date to look for
filename (string): Where to write the csv
"""
headers = {'Authorization': f"Bearer {token}"}
all_commits_after_date = get_commits_after_date(start_date, headers=headers)
all_check_suites = []
all_rows = []
for commit in all_commits_after_date:
# gather all the check suite data from each commit into a single list
add_commit_check_suites(commit, all_check_suites, headers)
for check_suite in all_check_suites:
# only record annotations for failed runs
if check_suite['conclusion'] == 'failure':
check_runs = requests.get(check_suite['check_runs_url'], headers=headers).json()
for run in check_runs['check_runs']:
if run['conclusion'] == 'failure' and run['output']['annotations_count'] > 0:
annotations = requests.get(run['output']['annotations_url'], headers=headers).json()
for annotation in annotations:
all_rows.append({
'commit_hash': run['head_sha'],
'name': run['name'],
'message': annotation['message'],
'run_started_at': run['started_at'],
'run_completed_at': run['completed_at'],
'commit_date': check_suite['commit_date']
})

with open(filename, 'w') as f:
writes = DictWriter(f, fieldnames=['commit_date', 'run_started_at', 'run_completed_at', 'commit_hash', 'name',
'message'])
writes.writeheader()
writes.writerows(all_rows)


def get_commits_after_date(cut_off_date, headers):
"""
Get API data for all commits to edx-platform/master after the given date
Parameters:
cut_off_date (date): Earliest date to look
headers (dict): Authentication headers for GH requests
Returns:
A list of all the API responses for each commit after the date
"""
base_url = "https://api.github.com/repos/openedx/edx-platform/commits?sha=master&per_page=100"
# will keep track of whether we've hit our start_date. the API automatically returns commits ordered
# by date, descending
found_last = False
all_commits_after_date = []
page = 1
while not found_last:
page_url = f"{base_url}&page={page}"
print(f"Fetching page {page_url}")
response = requests.get(page_url, headers=headers)
if response.status_code >= 400:
print(response)
break
response_json = response.json()
if len(response_json) == 0:
break
for single_commit in response_json:
# if present, take off the "Z" at the end of the date to make it proper ISO format
commit_date = datetime.fromisoformat(single_commit['commit']['committer']['date'].replace("Z", ""))
if commit_date < cut_off_date:
found_last = True
break
all_commits_after_date.append(single_commit)
page += 1
return all_commits_after_date


def add_commit_check_suites(current_commit, current_suites, headers):
"""
Add API information from all check suites performed for a given commit to the given list
Parameters:
current_commit (str): the SHA of the commit to check
current_suites (list): list to be extended
headers (dict): Authentication headers for connecting to GitHub
"""
sha = current_commit['sha']
check_url = f"https://api.github.com/repos/openedx/edx-platform/commits/{sha}/check-suites?per_page=100"
page = 1
while True:
# Keep going until we get an empty check_suites list or an error. An empty list means we've hit the last page.
paginated_url = f"{check_url}&page={page}"
print(f"Fetching page {paginated_url}")
response = requests.get(paginated_url, headers=headers).json()
if 'check_suites' not in response.keys():
print(response)
break
check_suites = response['check_suites']
if len(check_suites) == 0:
break
# silly line to pass the date of the commit along to eventually write in the spreadsheet
current_suites.extend([{**s, 'commit_date': current_commit['commit']['committer']['date']}
for s in check_suites])
page += 1


if __name__ == '__main__':
get_errors_from_date()

0 comments on commit 2ccf0fd

Please sign in to comment.