ministryofjustice · julialawrence · Nov 3, 2023 · Nov 2, 2023 · Nov 2, 2023
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,12 @@
+# .github/dependabot.yml
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "daily"
+
+  - package-ecosystem: "github-actions"
+    directory: "/.github/workflows/"
+    schedule:
+      interval: "daily"
diff --git a/.github/workflows/generate-metrics.yml b/.github/workflows/generate-metrics.yml
@@ -0,0 +1,68 @@
+name: DORA Metrics Workflow
+
+on:
+  workflow_dispatch:
+    inputs:
+      date_range:
+        description: 'Date range for metrics (format: YYYY-MM-DD..YYYY-MM-DD)'
+        required: false
+      json_file_path:
+        description: 'Path to the JSON file with repository names'
+        required: false
+  schedule:
+    - cron: '0 8 * * 0' # At 08:00 on Sunday
+concurrency:
+  group: generate-metrics
+  cancel-in-progress: false
+permissions: {}
+
+jobs:
+  run_metrics:
+    permissions:
+      issues: write
+      contents: write
+      pull-requests: read
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/[email protected]
+
+    - name: Set up Python
+      uses: actions/[email protected]
+
+    - name: Install dependencies
+      run: pip install -r requirements.txt
+
+    - name: Get current date
+      if: github.event.inputs.date_range == ''
+      id: date
+      run: echo "date=$(date -u +%Y-%m-%d)" >> $GITHUB_ENV
+
+    - name: Calculate date range for the last 7 days
+      if: github.event.inputs.date_range == ''
+      id: last-7-days
+      run: |
+        echo "start_date=$(date -u +%Y-%m-%d --date='7 days ago')" >> $GITHUB_ENV
+
+    - name: Run scripts and create issues
+      env:
+        ACCESS_TOKEN: ${{ secrets.DATA_PLATFORM_ROBOT_PAT }}
+        GH_TOKEN: ${{ github.token }}
+        GITHUB_EVENT_NAME: ${{ github.event_name }}
+      run: |
+        for json_file in *.json; do
+          name=$(basename "$json_file" .json)
+          issue_title="📊 DORA Metrics for ${{ env.start_date }}..${{ env.date }} for $name"
+          issue_body=""
+
+          for script in cfr.py df.py ltfc.py mttr.py; do
+            python $script $json_file "${{ env.start_date }}..${{ env.date }}"
+            output=$(cat output.log)
+            rm output.log
+            metric=$(basename "$script" .py | tr '[:lower:]' '[:upper:]')
+            issue_body+="$output"
+          done
+          if [[ "${GITHUB_EVENT_NAME}" == "schedule" ]]; then
+            gh issue create --title "$issue_title" --body "$issue_body"
+          fi
+        done
diff --git a/analytical-platform.json b/analytical-platform.json
diff --git a/cfr.py b/cfr.py
@@ -3,22 +3,37 @@
 import os
 import argparse
 import json
+import logging
 
-OWNER = 'ministryofjustice'
+OWNER = "ministryofjustice"
+
+logger = logging.getLogger('MetricLogger')
+logger.setLevel(logging.INFO)
+
+
+fh = logging.FileHandler('output.log')
+fh.setLevel(logging.INFO)
+
+# Create formatter and set it for both handlers
+formatter = logging.Formatter('%(message)s')
+fh.setFormatter(formatter)
+logger.addHandler(fh)
 
 
 # Read ACCESS_TOKEN from environment
-ACCESS_TOKEN = os.environ['ACCESS_TOKEN']
+ACCESS_TOKEN = os.environ["ACCESS_TOKEN"]
 
 # set up the command-line argument parser
 parser = argparse.ArgumentParser()
-parser.add_argument('filename', help='path to the input JSON file')
-parser.add_argument('date_query', help='date range in the format 2023-04-01..2023-05-01')
+parser.add_argument("filename", help="path to the input JSON file")
+parser.add_argument(
+    "date_query", help="date range in the format 2023-04-01..2023-05-01"
+)
 args = parser.parse_args()
 
 # load the repository names from a JSON file
-with open(args.filename, 'r') as f:
-    repos = json.load(f)['repos']
+with open(args.filename, "r") as f:
+    repos = json.load(f)["repos"]
 
 filename, file_extension = os.path.splitext(args.filename)
 # Initialize variables
@@ -27,19 +42,30 @@
 runs = []
 per_page = 100
 for repo in repos:
-# Define the query parameters to retrieve all workflow runs
-    params = {"branch": "main", "status": "completed", "per_page": per_page, "created": args.date_query}
+    # Define the query parameters to retrieve all workflow runs
+    params = {
+        "branch": "main",
+        "status": "completed",
+        "per_page": per_page,
+        "created": args.date_query,
+    }
 
     # Retrieve the workflow runs for the given repository using the provided query parameters
     workflow_runs = get_workflow_runs(OWNER, repo, ACCESS_TOKEN, params)
     total_workflow_runs += len(workflow_runs)
-    total_unsuccessful_runs += len([run for run in workflow_runs if run['conclusion'] != 'success'])
+    total_unsuccessful_runs += len(
+        [run for run in workflow_runs if run["conclusion"] != "success"]
+    )
 
 
 # Calculate the percentage of unsuccessful runs
 failure_rate = (total_unsuccessful_runs / total_workflow_runs) * 100
 
 # Output the Change Failure Rate
-print(f'Total Workflow Runs: {total_workflow_runs}')
-print(f'Total Unsuccessful Runs: {total_unsuccessful_runs}')
+logger.info(f"Total Workflow Runs: {total_workflow_runs}")
+logger.info(f"Total Unsuccessful Runs: {total_unsuccessful_runs}")
+logger.info(f"\nChange Failure Rate for {filename}: {failure_rate:.2f}%")
+
+print(f"Total Workflow Runs: {total_workflow_runs}")
+print(f"Total Unsuccessful Runs: {total_unsuccessful_runs}")
 print(f"\033[32m\033[1mChange Failure Rate for {filename}: {failure_rate:.2f}%\033[0m")
diff --git a/data-platform.json b/data-platform.json
@@ -1,6 +1,5 @@
 {
     "repos": [
-        "data-platform",
-        "data-platform-products"
+        "data-platform"
     ]
 }
diff --git a/df.py b/df.py
@@ -3,9 +3,22 @@
 import json
 import os
 from github_api import get_workflow_runs
+import logging
 
+OWNER = "ministryofjustice"
 
-OWNER = 'ministryofjustice'
+logger = logging.getLogger('MetricLogger')
+logger.setLevel(logging.INFO)
+
+fh = logging.FileHandler('output.log')
+fh.setLevel(logging.INFO)
+
+# Create formatter and set it for both handlers
+formatter = logging.Formatter('%(message)s')
+fh.setFormatter(formatter)
+
+# Add the handlers to the logger
+logger.addHandler(fh)
 
 
 # Initialize variables
@@ -15,26 +28,33 @@
 date_format = "%Y-%m-%dT%H:%M:%SZ"
 
 # Read ACCESS_TOKEN from environment
-ACCESS_TOKEN = os.environ['ACCESS_TOKEN']
+ACCESS_TOKEN = os.environ["ACCESS_TOKEN"]
 
 # set up the command-line argument parser
 parser = argparse.ArgumentParser()
-parser.add_argument('filename', help='path to the input JSON file')
-parser.add_argument('date_query', help='date range in the format 2023-04-01..2023-05-01')
+parser.add_argument("filename", help="path to the input JSON file")
+parser.add_argument(
+    "date_query", help="date range in the format 2023-04-01..2023-05-01"
+)
 args = parser.parse_args()
 
 filename, file_extension = os.path.splitext(args.filename)
 
 # load the repository names from a JSON file
-with open(args.filename, 'r') as f:
-    repos = json.load(f)['repos']
+with open(args.filename, "r") as f:
+    repos = json.load(f)["repos"]
 
 num_successful_runs = 0
 
 for repo in repos:
-    params = {"branch": "main", "status": "success", "per_page": per_page, "created": args.date_query}
+    params = {
+        "branch": "main",
+        "status": "success",
+        "per_page": per_page,
+        "created": args.date_query,
+    }
     try:
-        runs += get_workflow_runs(OWNER,repo, ACCESS_TOKEN,params)
+        runs += get_workflow_runs(OWNER, repo, ACCESS_TOKEN, params)
         # Count the number of successful runs
     except Exception as e:
         # Log message if there's a problem retrieving the workflow runs
@@ -56,5 +76,7 @@
 
 if deployment_frequency is not None:
     print(f"\033[1m\033[32mDaily deployment frequency for {filename}: {deployment_frequency:.2f} deployments/day\033[0m")
+    logger.info(f"\nDaily deployment frequency for {filename}: {deployment_frequency:.2f} deployments/day")
 else:
     print(f"\033[1m\033[32m{filename} does not use github actions for deployments\033[0m")
+    logger.info(f"{filename} does not use github actions for deployments")
diff --git a/github_api.py b/github_api.py
@@ -1,17 +1,18 @@
 import requests
 from urllib.parse import urlparse, parse_qs
 
+
 def get_workflow_runs(owner, repo, token, params):
     """Retrieves all workflow runs for a given repository using the provided query parameters."""
 
     # Set the necessary authentication headers using the personal access token (PAT)
     headers = {
-        'Authorization': f'Bearer {token}',
-        'Accept': 'application/vnd.github.v3+json'
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/vnd.github.v3+json",
     }
 
     # Define the API endpoint to retrieve the workflow runs for the given repository
-    url = f'https://api.github.com/repos/{owner}/{repo}/actions/runs'
+    url = f"https://api.github.com/repos/{owner}/{repo}/actions/runs"
 
     # Retrieve all workflow runs for the given repository using the updated query parameters
     workflow_runs = []
@@ -21,19 +22,19 @@ def get_workflow_runs(owner, repo, token, params):
             response.raise_for_status()
             response_json = response.json()
 
-            workflow_runs.extend(response_json['workflow_runs'])
+            workflow_runs.extend(response_json["workflow_runs"])
 
-            if 'next' in response.links:
-                next_link = response.links['next']['url']
+            if "next" in response.links:
+                next_link = response.links["next"]["url"]
                 # Get the value of the `page` parameter from the `next` link, if it exists
                 url_parts = urlparse(next_link)
-                page = parse_qs(url_parts.query).get('page', None)
+                page = parse_qs(url_parts.query).get("page", None)
                 if page is not None:
                     page = int(page[0])
                     # Update the `page` parameter in the `params` dictionary to retrieve the next page of results
-                    params['page'] = page
+                    params["page"] = page
                 # remove the query string from the URL as we don't need to specify the completed_at parameter in subsequent requests
-                url = next_link.split('?')[0]
+                url = next_link.split("?")[0]
             else:
                 break
 
@@ -43,17 +44,18 @@ def get_workflow_runs(owner, repo, token, params):
         # Raise an error if there's a problem retrieving the workflow runs
         raise ValueError(f"Error retrieving workflow runs: {e}")
 
+
 def get_merged_pull_requests(owner, repo, token, params):
     """Retrieves all merged pull requests for a given repository using the provided query parameters."""
 
     # Set the necessary authentication headers using the personal access token (PAT)
     headers = {
-        'Authorization': f'Bearer {token}',
-        'Accept': 'application/vnd.github.v3+json'
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/vnd.github.v3+json",
     }
 
     # Define the API endpoint to retrieve the merged pull requests for the given repository
-    url = f'https://api.github.com/repos/{owner}/{repo}/pulls'
+    url = f"https://api.github.com/repos/{owner}/{repo}/pulls"
 
     # Retrieve all merged pull requests for the given repository using the updated query parameters
     merged_pull_requests = []
@@ -65,20 +67,20 @@ def get_merged_pull_requests(owner, repo, token, params):
 
             # Filter only the merged pull requests from the list of closed pull requests
             for pull_request in response_json:
-                if pull_request['merged_at'] is not None:
+                if pull_request["merged_at"] is not None:
                     merged_pull_requests.append(pull_request)
 
-            if 'next' in response.links:
-                next_link = response.links['next']['url']
+            if "next" in response.links:
+                next_link = response.links["next"]["url"]
                 # Get the value of the `page` parameter from the `next` link, if it exists
                 url_parts = urlparse(next_link)
-                page = parse_qs(url_parts.query).get('page', None)
+                page = parse_qs(url_parts.query).get("page", None)
                 if page is not None:
                     page = int(page[0])
                     # Update the `page` parameter in the `params` dictionary to retrieve the next page of results
-                    params['page'] = page
+                    params["page"] = page
                 # remove the query string from the URL as we don't need to specify the completed_at parameter in subsequent requests
-                url = next_link.split('?')[0]
+                url = next_link.split("?")[0]
             else:
                 break
 
@@ -88,11 +90,12 @@ def get_merged_pull_requests(owner, repo, token, params):
         # Raise an error if there's a problem retrieving the pull requests
         raise ValueError(f"Error retrieving pull requests: {e}")
 
-def make_github_api_call(url,token):
+
+def make_github_api_call(url, token):
     # Set the necessary authentication headers using the personal access token (PAT)
     headers = {
-        'Authorization': f'Bearer {token}',
-        'Accept': 'application/vnd.github.v3+json'
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/vnd.github.v3+json",
     }
     try:
         response = requests.get(url, headers=headers)