Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add script to get annotations from errored actions #383

Merged
merged 6 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Change Log

Unreleased
~~~~~~~~~~
*
* Add script to get github action errors

[2.0.0] - 2023-06-01
~~~~~~~~~~~~~~~~~~~~
Expand Down
133 changes: 133 additions & 0 deletions edx_arch_experiments/scripts/get_action_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""
Script to get the annotations from all failed checks in edx-platform after a given date

Gets all the commits to master after the date, then for each commit gets each check suite, then for each failed check
suite gets each run. Collects the annotations for all the failed runs. The annotations will sometimes contain useful
error messages, sometimes just the exit code. Getting the full logs requires admin permissions to edx-platform so it's
not included in this script.

This script takes a pretty long time to run (15m for 2 months) and there is a risk if you look too far back you will hit
your API limit.
"""

from csv import DictWriter
from datetime import datetime

import click
import requests


@click.command()
@click.option('--token', envvar='GITHUB_TOKEN')
@click.option('--start_date', type=click.DateTime(formats=["%Y-%m-%d"]), help="Date of earliest commit")
@click.option('--filename', help="Where to write the data")
def get_errors_from_date(token, start_date, filename):
"""
Creates a csv documenting the annotations from all failed runs for commits to edx-platform after the given date

Parameters:
token (string): The GitHub API token. Retrieved from the env GITHUB_TOKEN variable
start_date (date): The earliest date to look for
filename (string): Where to write the csv

"""
headers = {'Authorization': f"Bearer {token}"}
all_commits_after_date = get_commits_after_date(start_date, headers=headers)
all_check_suites = []
all_rows = []
for commit in all_commits_after_date:
# gather all the check suite data from each commit into a single list
add_commit_check_suites(commit, all_check_suites, headers)
for check_suite in all_check_suites:
# only record annotations for failed runs
if check_suite['conclusion'] == 'failure':
check_runs = requests.get(check_suite['check_runs_url'], headers=headers).json()
for run in check_runs['check_runs']:
if run['conclusion'] == 'failure' and run['output']['annotations_count'] > 0:
annotations = requests.get(run['output']['annotations_url'], headers=headers).json()
for annotation in annotations:
all_rows.append({
'commit_hash': run['head_sha'],
'name': run['name'],
'message': annotation['message'],
'run_started_at': run['started_at'],
'run_completed_at': run['completed_at'],
'commit_date': check_suite['commit_date']
})

with open(filename, 'w') as f:
writes = DictWriter(f, fieldnames=['commit_date', 'run_started_at', 'run_completed_at', 'commit_hash', 'name',
'message'])
writes.writeheader()
writes.writerows(all_rows)


def get_commits_after_date(cut_off_date, headers):
"""
Get API data for all commits to edx-platform/master after the given date

Parameters:
cut_off_date (date): Earliest date to look
headers (dict): Authentication headers for GH requests

Returns:
A list of all the API responses for each commit after the date
"""
base_url = "https://api.github.com/repos/openedx/edx-platform/commits?sha=master&per_page=100"
# will keep track of whether we've hit our start_date. the API automatically returns commits ordered
# by date, descending
found_last = False
all_commits_after_date = []
page = 1
while not found_last:
page_url = f"{base_url}&page={page}"
print(f"Fetching page {page_url}")
response = requests.get(page_url, headers=headers)
if response.status_code >= 400:
print(response)
break
response_json = response.json()
if len(response_json) == 0:
break
for single_commit in response_json:
# if present, take off the "Z" at the end of the date to make it proper ISO format
commit_date = datetime.fromisoformat(single_commit['commit']['committer']['date'].replace("Z", ""))
if commit_date < cut_off_date:
found_last = True
break
all_commits_after_date.append(single_commit)
page += 1
return all_commits_after_date


def add_commit_check_suites(current_commit, current_suites, headers):
"""
Add API information from all check suites performed for a given commit to the given list

Parameters:
current_commit (str): the SHA of the commit to check
current_suites (list): list to be extended
headers (dict): Authentication headers for connecting to GitHub
"""
sha = current_commit['sha']
check_url = f"https://api.github.com/repos/openedx/edx-platform/commits/{sha}/check-suites?per_page=100"
page = 1
while True:
# Keep going until we get an empty check_suites list or an error. An empty list means we've hit the last page.
paginated_url = f"{check_url}&page={page}"
print(f"Fetching page {paginated_url}")
response = requests.get(paginated_url, headers=headers).json()
if 'check_suites' not in response.keys():
print(response)
break
check_suites = response['check_suites']
if len(check_suites) == 0:
break
# silly line to pass the date of the commit along to eventually write in the spreadsheet
current_suites.extend([{**s, 'commit_date': current_commit['commit']['committer']['date']}
for s in check_suites])
page += 1


if __name__ == '__main__':
get_errors_from_date()
Loading