diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..33a925e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +# .github/dependabot.yml +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + + - package-ecosystem: "github-actions" + directory: "/.github/workflows/" + schedule: + interval: "daily" diff --git a/.github/workflows/generate-metrics.yml b/.github/workflows/generate-metrics.yml new file mode 100644 index 0000000..7023af5 --- /dev/null +++ b/.github/workflows/generate-metrics.yml @@ -0,0 +1,68 @@ +name: DORA Metrics Workflow + +on: + workflow_dispatch: + inputs: + date_range: + description: 'Date range for metrics (format: YYYY-MM-DD..YYYY-MM-DD)' + required: false + json_file_path: + description: 'Path to the JSON file with repository names' + required: false + schedule: + - cron: '0 8 * * 0' # At 08:00 on Sunday +concurrency: + group: generate-metrics + cancel-in-progress: false +permissions: {} + +jobs: + run_metrics: + permissions: + issues: write + contents: write + pull-requests: read + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4.1.1 + + - name: Set up Python + uses: actions/setup-python@v4.7.0 + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Get current date + if: github.event.inputs.date_range == '' + id: date + run: echo "date=$(date -u +%Y-%m-%d)" >> $GITHUB_ENV + + - name: Calculate date range for the last 7 days + if: github.event.inputs.date_range == '' + id: last-7-days + run: | + echo "start_date=$(date -u +%Y-%m-%d --date='7 days ago')" >> $GITHUB_ENV + + - name: Run scripts and create issues + env: + ACCESS_TOKEN: ${{ secrets.DATA_PLATFORM_ROBOT_PAT }} + GH_TOKEN: ${{ github.token }} + GITHUB_EVENT_NAME: ${{ github.event_name }} + run: | + for json_file in *.json; do + name=$(basename "$json_file" .json) + issue_title="📊 DORA Metrics for ${{ env.start_date }}..${{ env.date }} for $name" + issue_body="" + + for script in cfr.py df.py ltfc.py mttr.py; do + python $script $json_file "${{ env.start_date }}..${{ env.date }}" + output=$(cat output.log) + rm output.log + metric=$(basename "$script" .py | tr '[:lower:]' '[:upper:]') + issue_body+="$output" + done + if [[ "${GITHUB_EVENT_NAME}" == "schedule" ]]; then + gh issue create --title "$issue_title" --body "$issue_body" + fi + done \ No newline at end of file diff --git a/analytical-platform.json b/analytical-platform.json deleted file mode 100644 index 2145593..0000000 --- a/analytical-platform.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "repos": [ - "analytical-platform-iam", - "analytics-platform-infrastructure" - ] -} \ No newline at end of file diff --git a/cfr.py b/cfr.py index c401a25..3b414cc 100644 --- a/cfr.py +++ b/cfr.py @@ -3,22 +3,37 @@ import os import argparse import json +import logging -OWNER = 'ministryofjustice' +OWNER = "ministryofjustice" + +logger = logging.getLogger('MetricLogger') +logger.setLevel(logging.INFO) + + +fh = logging.FileHandler('output.log') +fh.setLevel(logging.INFO) + +# Create formatter and set it for both handlers +formatter = logging.Formatter('%(message)s') +fh.setFormatter(formatter) +logger.addHandler(fh) # Read ACCESS_TOKEN from environment -ACCESS_TOKEN = os.environ['ACCESS_TOKEN'] +ACCESS_TOKEN = os.environ["ACCESS_TOKEN"] # set up the command-line argument parser parser = argparse.ArgumentParser() -parser.add_argument('filename', help='path to the input JSON file') -parser.add_argument('date_query', help='date range in the format 2023-04-01..2023-05-01') +parser.add_argument("filename", help="path to the input JSON file") +parser.add_argument( + "date_query", help="date range in the format 2023-04-01..2023-05-01" +) args = parser.parse_args() # load the repository names from a JSON file -with open(args.filename, 'r') as f: - repos = json.load(f)['repos'] +with open(args.filename, "r") as f: + repos = json.load(f)["repos"] filename, file_extension = os.path.splitext(args.filename) # Initialize variables @@ -27,19 +42,30 @@ runs = [] per_page = 100 for repo in repos: -# Define the query parameters to retrieve all workflow runs - params = {"branch": "main", "status": "completed", "per_page": per_page, "created": args.date_query} + # Define the query parameters to retrieve all workflow runs + params = { + "branch": "main", + "status": "completed", + "per_page": per_page, + "created": args.date_query, + } # Retrieve the workflow runs for the given repository using the provided query parameters workflow_runs = get_workflow_runs(OWNER, repo, ACCESS_TOKEN, params) total_workflow_runs += len(workflow_runs) - total_unsuccessful_runs += len([run for run in workflow_runs if run['conclusion'] != 'success']) + total_unsuccessful_runs += len( + [run for run in workflow_runs if run["conclusion"] != "success"] + ) # Calculate the percentage of unsuccessful runs failure_rate = (total_unsuccessful_runs / total_workflow_runs) * 100 # Output the Change Failure Rate -print(f'Total Workflow Runs: {total_workflow_runs}') -print(f'Total Unsuccessful Runs: {total_unsuccessful_runs}') +logger.info(f"Total Workflow Runs: {total_workflow_runs}") +logger.info(f"Total Unsuccessful Runs: {total_unsuccessful_runs}") +logger.info(f"\nChange Failure Rate for {filename}: {failure_rate:.2f}%") + +print(f"Total Workflow Runs: {total_workflow_runs}") +print(f"Total Unsuccessful Runs: {total_unsuccessful_runs}") print(f"\033[32m\033[1mChange Failure Rate for {filename}: {failure_rate:.2f}%\033[0m") diff --git a/data-platform.json b/data-platform.json index 0dec2ad..fd7cdf4 100644 --- a/data-platform.json +++ b/data-platform.json @@ -1,6 +1,5 @@ { "repos": [ - "data-platform", - "data-platform-products" + "data-platform" ] } \ No newline at end of file diff --git a/df.py b/df.py index edc3c14..5770ab7 100644 --- a/df.py +++ b/df.py @@ -3,9 +3,22 @@ import json import os from github_api import get_workflow_runs +import logging +OWNER = "ministryofjustice" -OWNER = 'ministryofjustice' +logger = logging.getLogger('MetricLogger') +logger.setLevel(logging.INFO) + +fh = logging.FileHandler('output.log') +fh.setLevel(logging.INFO) + +# Create formatter and set it for both handlers +formatter = logging.Formatter('%(message)s') +fh.setFormatter(formatter) + +# Add the handlers to the logger +logger.addHandler(fh) # Initialize variables @@ -15,26 +28,33 @@ date_format = "%Y-%m-%dT%H:%M:%SZ" # Read ACCESS_TOKEN from environment -ACCESS_TOKEN = os.environ['ACCESS_TOKEN'] +ACCESS_TOKEN = os.environ["ACCESS_TOKEN"] # set up the command-line argument parser parser = argparse.ArgumentParser() -parser.add_argument('filename', help='path to the input JSON file') -parser.add_argument('date_query', help='date range in the format 2023-04-01..2023-05-01') +parser.add_argument("filename", help="path to the input JSON file") +parser.add_argument( + "date_query", help="date range in the format 2023-04-01..2023-05-01" +) args = parser.parse_args() filename, file_extension = os.path.splitext(args.filename) # load the repository names from a JSON file -with open(args.filename, 'r') as f: - repos = json.load(f)['repos'] +with open(args.filename, "r") as f: + repos = json.load(f)["repos"] num_successful_runs = 0 for repo in repos: - params = {"branch": "main", "status": "success", "per_page": per_page, "created": args.date_query} + params = { + "branch": "main", + "status": "success", + "per_page": per_page, + "created": args.date_query, + } try: - runs += get_workflow_runs(OWNER,repo, ACCESS_TOKEN,params) + runs += get_workflow_runs(OWNER, repo, ACCESS_TOKEN, params) # Count the number of successful runs except Exception as e: # Log message if there's a problem retrieving the workflow runs @@ -56,5 +76,7 @@ if deployment_frequency is not None: print(f"\033[1m\033[32mDaily deployment frequency for {filename}: {deployment_frequency:.2f} deployments/day\033[0m") + logger.info(f"\nDaily deployment frequency for {filename}: {deployment_frequency:.2f} deployments/day") else: print(f"\033[1m\033[32m{filename} does not use github actions for deployments\033[0m") + logger.info(f"{filename} does not use github actions for deployments") diff --git a/github_api.py b/github_api.py index c27f904..a6e451a 100644 --- a/github_api.py +++ b/github_api.py @@ -1,17 +1,18 @@ import requests from urllib.parse import urlparse, parse_qs + def get_workflow_runs(owner, repo, token, params): """Retrieves all workflow runs for a given repository using the provided query parameters.""" # Set the necessary authentication headers using the personal access token (PAT) headers = { - 'Authorization': f'Bearer {token}', - 'Accept': 'application/vnd.github.v3+json' + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github.v3+json", } # Define the API endpoint to retrieve the workflow runs for the given repository - url = f'https://api.github.com/repos/{owner}/{repo}/actions/runs' + url = f"https://api.github.com/repos/{owner}/{repo}/actions/runs" # Retrieve all workflow runs for the given repository using the updated query parameters workflow_runs = [] @@ -21,19 +22,19 @@ def get_workflow_runs(owner, repo, token, params): response.raise_for_status() response_json = response.json() - workflow_runs.extend(response_json['workflow_runs']) + workflow_runs.extend(response_json["workflow_runs"]) - if 'next' in response.links: - next_link = response.links['next']['url'] + if "next" in response.links: + next_link = response.links["next"]["url"] # Get the value of the `page` parameter from the `next` link, if it exists url_parts = urlparse(next_link) - page = parse_qs(url_parts.query).get('page', None) + page = parse_qs(url_parts.query).get("page", None) if page is not None: page = int(page[0]) # Update the `page` parameter in the `params` dictionary to retrieve the next page of results - params['page'] = page + params["page"] = page # remove the query string from the URL as we don't need to specify the completed_at parameter in subsequent requests - url = next_link.split('?')[0] + url = next_link.split("?")[0] else: break @@ -43,17 +44,18 @@ def get_workflow_runs(owner, repo, token, params): # Raise an error if there's a problem retrieving the workflow runs raise ValueError(f"Error retrieving workflow runs: {e}") + def get_merged_pull_requests(owner, repo, token, params): """Retrieves all merged pull requests for a given repository using the provided query parameters.""" # Set the necessary authentication headers using the personal access token (PAT) headers = { - 'Authorization': f'Bearer {token}', - 'Accept': 'application/vnd.github.v3+json' + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github.v3+json", } # Define the API endpoint to retrieve the merged pull requests for the given repository - url = f'https://api.github.com/repos/{owner}/{repo}/pulls' + url = f"https://api.github.com/repos/{owner}/{repo}/pulls" # Retrieve all merged pull requests for the given repository using the updated query parameters merged_pull_requests = [] @@ -65,20 +67,20 @@ def get_merged_pull_requests(owner, repo, token, params): # Filter only the merged pull requests from the list of closed pull requests for pull_request in response_json: - if pull_request['merged_at'] is not None: + if pull_request["merged_at"] is not None: merged_pull_requests.append(pull_request) - if 'next' in response.links: - next_link = response.links['next']['url'] + if "next" in response.links: + next_link = response.links["next"]["url"] # Get the value of the `page` parameter from the `next` link, if it exists url_parts = urlparse(next_link) - page = parse_qs(url_parts.query).get('page', None) + page = parse_qs(url_parts.query).get("page", None) if page is not None: page = int(page[0]) # Update the `page` parameter in the `params` dictionary to retrieve the next page of results - params['page'] = page + params["page"] = page # remove the query string from the URL as we don't need to specify the completed_at parameter in subsequent requests - url = next_link.split('?')[0] + url = next_link.split("?")[0] else: break @@ -88,11 +90,12 @@ def get_merged_pull_requests(owner, repo, token, params): # Raise an error if there's a problem retrieving the pull requests raise ValueError(f"Error retrieving pull requests: {e}") -def make_github_api_call(url,token): + +def make_github_api_call(url, token): # Set the necessary authentication headers using the personal access token (PAT) headers = { - 'Authorization': f'Bearer {token}', - 'Accept': 'application/vnd.github.v3+json' + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github.v3+json", } try: response = requests.get(url, headers=headers) diff --git a/ltfc.py b/ltfc.py index 36e1f4c..2ae24fa 100644 --- a/ltfc.py +++ b/ltfc.py @@ -3,21 +3,37 @@ import json import os import argparse -import pprint +import logging -OWNER = 'ministryofjustice' + +OWNER = "ministryofjustice" + +logger = logging.getLogger('MetricLogger') +logger.setLevel(logging.INFO) + +fh = logging.FileHandler('output.log') +fh.setLevel(logging.INFO) + +# Create formatter and set it for both handlers +formatter = logging.Formatter('%(message)s') +fh.setFormatter(formatter) + +# Add the handlers to the logger +logger.addHandler(fh) # Initialize variables merged_pull_requests = [] per_page = 100 # Read ACCESS_TOKEN from environment -ACCESS_TOKEN = os.environ['ACCESS_TOKEN'] +ACCESS_TOKEN = os.environ["ACCESS_TOKEN"] # set up the command-line argument parser parser = argparse.ArgumentParser() -parser.add_argument('filename', help='path to the input JSON file') -parser.add_argument('date_query', help='date range in the format 2023-04-01..2023-05-01') +parser.add_argument("filename", help="path to the input JSON file") +parser.add_argument( + "date_query", help="date range in the format 2023-04-01..2023-05-01" +) args = parser.parse_args() filename, file_extension = os.path.splitext(args.filename) @@ -26,16 +42,24 @@ team_lead_time = timedelta() date_query = args.date_query date_range = date_query.split("..") -start_date = datetime.strptime(date_range[0], '%Y-%m-%d') -end_date = datetime.strptime(date_range[-1], '%Y-%m-%d') +start_date = datetime.strptime(date_range[0], "%Y-%m-%d") +end_date = datetime.strptime(date_range[-1], "%Y-%m-%d") # load the repository names from a JSON file -with open(args.filename, 'r') as f: - repos = json.load(f)['repos'] +with open(args.filename, "r") as f: + repos = json.load(f)["repos"] for repo in repos: - params = {"state": "closed", "sort": "updated", "direction": "desc", "per_page": per_page, "base": "main"} + params = { + "state": "closed", + "sort": "updated", + "direction": "desc", + "per_page": per_page, + "base": "main", + } try: - merged_pull_requests = get_merged_pull_requests(OWNER, repo, ACCESS_TOKEN, params) + merged_pull_requests = get_merged_pull_requests( + OWNER, repo, ACCESS_TOKEN, params + ) except Exception as e: # Log message if there's a problem retrieving pull requests print(f"Error retrieving pull requests: {e}") @@ -63,8 +87,12 @@ # commit_date = datetime.fromisoformat(commit["commit"]["committer"]["date"][:-1]) # print(f"Commit date: {commit_date}") if len(commits) > 0: - last_commit = commits[-1] # change to commits[0] to get first rather than commit. - commit_time = datetime.fromisoformat(last_commit["commit"]["committer"]["date"][:-1]) + last_commit = commits[ + -1 + ] # change to commits[0] to get first rather than commit. + commit_time = datetime.fromisoformat( + last_commit["commit"]["committer"]["date"][:-1] + ) # print(f"Commit date: {commit_time}") else: commit_time = merged_at @@ -82,6 +110,21 @@ if team_merged_pull_requests > 0: mean_lead_time = team_lead_time / team_merged_pull_requests - print(f"\033[32m\033[1mMean lead time for {filename} team over {team_merged_pull_requests} merged pull requests: {mean_lead_time.days} days, {mean_lead_time.seconds // 3600} hours, {(mean_lead_time.seconds % 3600) // 60} minutes\033[0m") + message = ( + f"\033[32m\033[1mMean lead time for {filename} team over " + f"{team_merged_pull_requests} merged pull requests: {mean_lead_time.days} days, " + f"{mean_lead_time.seconds // 3600} hours, " + f"{(mean_lead_time.seconds % 3600) // 60} minutes\033[0m" + ) + print(message) + message = ( + f"\nMean lead time for {filename} team over " + f"{team_merged_pull_requests} merged pull requests: {mean_lead_time.days} days, " + f"{mean_lead_time.seconds // 3600} hours, " + f"{(mean_lead_time.seconds % 3600) // 60}" + ) + logger.info(message) + else: print("No merged pull requests found.") + logger.info("No merged pull requests found.") diff --git a/mttr.py b/mttr.py index 38545b6..471b32c 100644 --- a/mttr.py +++ b/mttr.py @@ -1,24 +1,41 @@ from datetime import datetime, timedelta from github_api import get_workflow_runs import json -import pprint import argparse import os from collections import defaultdict +import logging + # replace with your personal access token and repo information -OWNER = 'ministryofjustice' +OWNER = "ministryofjustice" + +logger = logging.getLogger('MetricLogger') +logger.setLevel(logging.INFO) + +fh = logging.FileHandler('output.log') +fh.setLevel(logging.INFO) + +# Create formatter and set it for both handlers +formatter = logging.Formatter('%(message)s') +fh.setFormatter(formatter) + +# Add the handlers to the logger +logger.addHandler(fh) + workflow_periods = defaultdict(list) workflow_stacks = defaultdict(list) # Read ACCESS_TOKEN from environment -ACCESS_TOKEN = os.environ['ACCESS_TOKEN'] +ACCESS_TOKEN = os.environ["ACCESS_TOKEN"] # set up the command-line argument parser parser = argparse.ArgumentParser() -parser.add_argument('filename', help='path to the input JSON file') -parser.add_argument('date_query', help='date range in the format 2023-04-01..2023-05-01') +parser.add_argument("filename", help="path to the input JSON file") +parser.add_argument( + "date_query", help="date range in the format 2023-04-01..2023-05-01" +) args = parser.parse_args() @@ -27,19 +44,18 @@ per_page = 100 # load the repository names from a JSON file -with open(args.filename, 'r') as f: - repos = json.load(f)['repos'] +with open(args.filename, "r") as f: + repos = json.load(f)["repos"] filename, file_extension = os.path.splitext(args.filename) # loop over each repository for repo in repos: - # Get all workflow runs on the main branch params = {"branch": "main", "per_page": per_page, "created": args.date_query} try: - repo_run = get_workflow_runs(OWNER,repo, ACCESS_TOKEN,params) + repo_run = get_workflow_runs(OWNER, repo, ACCESS_TOKEN, params) print(f"Retrieved {len(repo_run)} workflow runs for {OWNER}/{repo}") runs += repo_run @@ -50,47 +66,64 @@ print(f"Retrieved {len(runs)} workflow runs in total") # sort the workflow runs by created_at in ascending order -runs = sorted(runs, key=lambda run: datetime.fromisoformat(run['created_at'].replace('Z', ''))) +runs = sorted( + runs, key=lambda run: datetime.fromisoformat(run["created_at"].replace("Z", "")) +) # filter the unsuccessful runs -unsuccessful_runs = [run for run in runs if run['conclusion'] != 'success'] +unsuccessful_runs = [run for run in runs if run["conclusion"] != "success"] # find the periods between the first unsuccessful run and the first subsequent successful run for each workflow for run in runs: - workflow_id = run['workflow_id'] - workflow_name = run['name'] + workflow_id = run["workflow_id"] + workflow_name = run["name"] if workflow_name == "Terraform Static Code Analysis": continue - timestamp = datetime.fromisoformat(run['created_at'].replace('Z', '')) + timestamp = datetime.fromisoformat(run["created_at"].replace("Z", "")) - if run['conclusion'] != 'success': + if run["conclusion"] != "success": if not workflow_stacks[workflow_id]: workflow_stacks[workflow_id].append(timestamp) print(f"Found new failure for workflow '{workflow_name}' at {timestamp}") else: if workflow_stacks[workflow_id]: start = workflow_stacks[workflow_id].pop() - period = {'start': start, 'end': timestamp} + period = {"start": start, "end": timestamp} workflow_periods[workflow_id].append(period) print(f"Found new success for workflow '{workflow_name}' at {timestamp}") workflow_stacks[workflow_id] = [] # calculate the time to recovery for each workflow -workflow_recovery_times = {workflow_id: [period['end'] - period['start'] for period in periods if period['end']] - for workflow_id, periods in workflow_periods.items()} +workflow_recovery_times = { + workflow_id: [ + period["end"] - period["start"] for period in periods if period["end"] + ] + for workflow_id, periods in workflow_periods.items() +} # print("### Workflow Recovery Dict ###") # pprint.pprint(workflow_recovery_times) total_workflows = sum(len(periods) for periods in workflow_periods.values()) print(f"Total Workflows: {total_workflows}") -total_recovery_time = sum((time_to_recovery for workflow_times in workflow_recovery_times.values() for time_to_recovery in workflow_times), timedelta(0)) -mean_time_to_recovery = total_recovery_time / total_workflows if total_workflows > 0 else None +total_recovery_time = sum( + ( + time_to_recovery + for workflow_times in workflow_recovery_times.values() + for time_to_recovery in workflow_times + ), + timedelta(0), +) +mean_time_to_recovery = ( + total_recovery_time / total_workflows if total_workflows > 0 else None +) if mean_time_to_recovery is not None: days, seconds = mean_time_to_recovery.days, mean_time_to_recovery.seconds hours = seconds // 3600 minutes = (seconds % 3600) // 60 print(f"\033[32m\033[1mMean time to recovery for {filename}: {days} days, {hours} hours, {minutes} minutes\033[0m") + logger.info(f"\nMean time to recovery for {filename}: {days} days, {hours} hours, {minutes} minutes") else: - print("No unsuccessful workflow runs found in the last 90 days.") \ No newline at end of file + print("No unsuccessful workflow runs found in the last 90 days.") + logger.info("No unsuccessful workflow runs found in the last 90 days.")