-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_repo_commits.py
175 lines (144 loc) · 5.05 KB
/
get_repo_commits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/env python3
"""
The functions in this file work together to collect a calendar year's
worth of commit information and store it in a JSON file.
"""
import requests
import json
from datetime import datetime
import os
def get_commit_info(commit):
"""
Extracts commit information from a GitHub API commit response.
Args:
commit (dict): A dictionary containing commit information from GitHub
API.
Returns:
dict: Extracted commit information including date and message.
"""
return {
'date': commit['commit']['author']['date'],
'message': commit['commit']['message']
}
def get_commit_metrics(
username,
repo_name,
access_token,
start_date,
end_date
):
"""
Retrieves commit metrics (earliest date, latest date, total commits, commit
messages) for a given repository within a specified date range using GitHub
API.
Args:
username (str): GitHub username.
repo_name (str): Repository name.
start_date (datetime): Start date for commit search.
end_date (datetime): End date for commit search.
Returns:
tuple: A tuple containing earliest date, latest date, total commits,
and commit messages.
"""
headers = {
'Authorization': f'Bearer {access_token}',
'Accept': 'application/vnd.github.v3+json'
}
base_url = f'https://api.github.com/repos/{username}/{repo_name}/commits'
params = {
'author': username,
'since': start_date.isoformat(),
'until': end_date.isoformat(),
'page': 1,
'per_page': 100 # GitHub API max per_page value
}
earliest_date = None
latest_date = None
total_commits = 0
commit_messages = []
try:
while True:
response = requests.get(
base_url,
params=params,
headers=headers,
)
# Print message to console
print("Retrieving: " + str(repo_name) +
"\t\tPage: " + str(params['page']), end="\r", flush=True)
response.raise_for_status()
commits = response.json()
if not commits:
break
total_commits += len(commits)
for commit in commits:
commit_info = get_commit_info(commit)
commit_messages.append(commit_info)
commit_date = datetime.strptime(
commit_info['date'], '%Y-%m-%dT%H:%M:%SZ'
)
if earliest_date is None or commit_date < earliest_date:
earliest_date = commit_date
if latest_date is None or commit_date > latest_date:
latest_date = commit_date
params['page'] += 1
except requests.exceptions.HTTPError as err:
print(f"HTTP error occurred: {err}")
return None, None, None, None
except Exception as e:
print(f"An error occurred: {e}")
return None, None, None, None
return earliest_date, latest_date, total_commits, commit_messages
def get_commit_data_for_repos(
username,
repo_names,
access_token,
start_date,
end_date
):
"""
Retrieves commit data (metrics and messages) for a list of repositories.
Args:
username (str): GitHub username.
repo_names (list): List of repository names.
start_date (datetime object): Start date for query
end_date (datetime object): End date for query
Returns:
dict: A dictionary containing commit data for each repository.
"""
commit_data_per_repo = {}
for repo_name in repo_names:
earliest_date, latest_date, total_commits, commit_messages = \
get_commit_metrics(
username,
repo_name,
access_token,
start_date,
end_date
)
if earliest_date and latest_date and total_commits is not None:
commit_data_per_repo[repo_name] = {
'earliest_date': earliest_date.isoformat(),
'latest_date': latest_date.isoformat(),
'total_commits': total_commits,
'commit_messages': commit_messages
}
return commit_data_per_repo
def create_commit_data_json(username, commit_data_per_repo, output_file):
"""
Creates a JSON file containing commit data for each repository.
Args:
username (str): GitHub username.
commit_data_per_repo (dict): Dictionary containing commit data for each
repository.
output_file (str): Path to the output JSON file.
"""
data = {
'username': username,
'commit_data_per_repo': commit_data_per_repo
}
path = 'json_files'
if not os.path.exists(path):
os.makedirs(path)
with open(output_file, 'w') as json_file:
json.dump(data, json_file, indent=4)