-
Notifications
You must be signed in to change notification settings - Fork 1
/
migrate.py
executable file
·262 lines (212 loc) · 10.1 KB
/
migrate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#!/usr/bin/env python3
import re
import github
import git_filter_repo as fr
import click
from github.Repository import Repository
import tempfile
import os
import subprocess
import pathlib
import urllib3
import datetime
import requests
import fnmatch
def run(args, wd=None):
"""Run a command and return its stdout, or raise an exception if the command fails."""
if not wd:
wd = os.getcwd()
res = subprocess.run(
args,
cwd=pathlib.Path(wd).resolve(),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if res.returncode != 0:
raise Exception(f'error running {args[0]}: {res.stdout.decode()} {res.stderr.decode()}')
return res.stdout.decode().strip()
class RateLimitRetry(urllib3.util.retry.Retry):
"""A urllib3 retry implementation for retrying rate-limited requests from the GitHub API."""
def get_retry_after(self, response):
reset_time = datetime.datetime.fromtimestamp(int(response.headers["X-RateLimit-Reset"]))
retry_after = (reset_time - datetime.datetime.now()).seconds + 1
print(f"Rate limited, retrying after {retry_after} seconds")
return retry_after
def transfer_issue(auth_token, issue_id, dest_repo_id):
"""Transfer an issue to a new repository using the GitHub GraphQL API."""
query = {'query': """
mutation{
transferIssue(input:{issueId:"%s",repositoryId:"%s"}) {
issue {
number
}
}
}
""" % (issue_id, dest_repo_id)}
return requests.post(
'https://api.github.com/graphql',
json=query,
headers={'Authorization': f'Bearer {auth_token}'}
).json()['data']['transferIssue']['issue']['number']
def gh_token():
"""Fetch the GH CLI token, this is just a shortcut for obtaining a valid GitHub API token."""
return run(['gh', 'auth', 'token'])
def new_gh(token):
return github.Github(token, retry=RateLimitRetry())
def find_unglobbed_files(repo_dir: str, globs: list[str]):
"""Find the files recursively in the given dir that don't match the given globs."""
files = run(['git', 'ls-files'], wd=repo_dir).split('\n')
files = [f.strip() for f in files]
unglobbed_files = []
for f in files:
globbed = False
for glob in globs:
if fnmatch.fnmatch(f, glob):
globbed = True
break
if not globbed:
unglobbed_files.append(f)
return unglobbed_files
class Callbacks(object):
def __init__(self, source_repo: Repository):
self._source_repo = source_repo
def commit_callback(self, commit: fr.Commit, metadata):
"""Callback for modifying commits when moving repos."""
# note that commit messages are byte strings
msg = commit.message.decode()
if 'Merge pull request' in msg:
msg = re.sub(
'Merge pull request (.+) from (.*)',
f'Merge pull request {self._source_repo.full_name}\\1 from \\2',
msg,
)
original_id = commit.original_id
if original_id:
msg += f'\n\nThis commit was moved from {self._source_repo.full_name}@{original_id.decode()}'
commit.message = msg.encode()
def clone_repo(dest_path: str, repo: Repository) -> str:
print(f'Cloning {repo.full_name}')
clone_dir = pathlib.Path(dest_path)
repo_dir = clone_dir / repo.name
os.mkdir(repo_dir)
run(['git', 'clone', repo.clone_url, repo_dir])
return str(repo_dir)
def filter_repo(callbacks: Callbacks, source_repo_path: str, globs, dest_subdir: str):
glob_args = [arg for glob in globs for arg in ('--path-glob', glob)]
fr_args = ['--quiet']
os.chdir(source_repo_path)
if dest_subdir:
fr_args += ['--to-subdirectory-filter', dest_subdir]
fr_args += glob_args
args = fr.FilteringOptions.parse_args(fr_args)
repo_filter = fr.RepoFilter(args, commit_callback=callbacks.commit_callback)
repo_filter.run()
def migrate_repo(gh: github.Github,
source_repo: str,
source_branch: str,
source_path: str,
globs: list[str],
dest_path: str,
dest_repo: str,
dest_subdir: str,
dest_branch: str,
dest_skip_clone: bool):
source_gh_repo = gh.get_repo(source_repo)
dest_gh_repo = gh.get_repo(dest_repo)
if not source_branch:
source_branch = source_gh_repo.default_branch
source_repo_dir = clone_repo(source_path, source_gh_repo)
if dest_skip_clone and not dest_path:
raise Exception("Must specify a destination path to skip cloning")
dest_repo_dir = dest_path
if not dest_skip_clone:
dest_repo_dir = clone_repo(dest_path, dest_gh_repo)
print()
for unglobbed_file in find_unglobbed_files(source_repo_dir, globs):
print(f'Skipping unmatched file {unglobbed_file} ')
print()
callbacks = Callbacks(source_gh_repo)
filter_repo(callbacks, source_repo_dir, globs, dest_subdir)
run(['git', 'remote', 'add', 'src-repo', source_repo_dir], wd=dest_repo_dir)
run(['git', 'checkout', '-B', 'tmp-migrate-branch'], wd=dest_repo_dir)
run(['git', 'pull', '--allow-unrelated-histories', '--no-rebase', 'src-repo', source_branch], wd=dest_repo_dir)
run(['git', 'checkout', '-B', dest_branch], wd=dest_repo_dir)
run(['git', 'merge', 'tmp-migrate-branch'], wd=dest_repo_dir)
run(['git', 'commit', '--amend', '-m', f'Merge commits from {source_repo}/{source_branch}'], wd=dest_repo_dir)
run(['git', 'remote', 'remove', 'src-repo'], wd=dest_repo_dir)
return dest_repo_dir
@click.command(name='repo', help='''Move a set of files/dirs from one GitHub repo to another into a subdirectory, preserving the history.
It's recommended to transfer into a new subdirectory to avoid having to deal with conflicts, and then refactoring the directory structure in a separate PR afterwards.
Some commit messages are rewritten so that GitHub links will continue to work correctly, such as links to pull requests in merge commits. Each commit message will also have a note added to the bottom of it explaining that the commit was transferred, and from where.
This requires an installed and configured GitHub CLI, see https://cli.github.com/.
''')
@click.option('--source-repo', required=True, help='the source repo, in the form <owner>/<name>, such as "ipfs/kubo"')
@click.option('--source-branch', help='the source repo branch to use, defaults to the GitHub master branch (usually "master" or "main")')
@click.option('--glob', required=True, multiple=True, help='glob indicating the set of dirs and files in the source repo to move to the destination repo; can be specified multiple times')
@click.option('--dest-repo', required=True, help='the destination repo, in the form <owner>/<name>, such as "ipfs/kubo"')
@click.option('--dest-subdir', help='the relative subdirectory in the destination repo to place the files from the source repo')
@click.option('--dest-branch', required=True, help='the branch to create in the destination repo to contain the changes')
@click.option('--dest-path', required=False, help='the filesystem path of the destination repo, defaults to a new temp dir')
@click.option('--dest-skip-clone', is_flag=True, default=False, required=False, help='skip cloning the destination repo (useful if the repo already exists at the destination path)')
def migrate_repo_cmd(source_repo, source_branch, glob, dest_repo, dest_subdir, dest_branch, dest_path, dest_skip_clone):
globs = list(glob)
gh = new_gh(gh_token())
if not dest_path or dest_path == "":
dest_path = tempfile.mkdtemp()
os.makedirs(dest_path, exist_ok=True)
source_path = tempfile.mkdtemp()
os.makedirs(source_path, exist_ok=True)
dest_repo_dir = migrate_repo(
gh,
source_repo,
source_branch,
source_path,
globs,
dest_path,
dest_repo,
dest_subdir,
dest_branch,
dest_skip_clone,
)
print(f'\n\nWork done in repo: {dest_repo_dir}')
print('''Switch to that directory and perform any necessary followup actions such as:
- Finish the merge if there was a conflict
- Run "go mod tidy" and fix up any dependency issues
- Wire the change into upstream dependencies, rewrite import paths, and rerun tests
- Push the branch & open a pull request\n''')
@click.command(name='issues', help='Migrate issues from one repo to another.')
@click.option('--source-repo', required=True)
@click.option('--dest-repo', required=True)
def migrate_issues_cmd(source_repo, dest_repo):
token = gh_token()
gh = new_gh(token)
gh_repo = gh.get_repo(dest_repo)
repo_id = gh_repo.raw_data['node_id']
issues = gh.search_issues(f'is:issue state:open repo:{source_repo}')
for issue in sorted(issues, key=lambda i: i.number):
issue_id = issue.raw_data['node_id']
new_issue_number = transfer_issue(token, issue_id, repo_id)
new_issue = gh_repo.get_issue(new_issue_number)
new_issue.edit(title=f'[{source_repo}] {new_issue.title}')
print(f'Transferred issue from {issue.html_url} to {new_issue.html_url}')
@click.command(name='clean-pull-requests', help='Clean all open pull requests by leaving a note about the migration and then closing the PR.')
@click.option('--source-repo', required=True)
@click.option('--dest-repo', required=True)
def clean_pull_requests_cmd(source_repo, dest_repo):
token = gh_token()
gh = new_gh(token)
source_gh_repo = gh.get_repo(source_repo)
dest_gh_repo = gh.get_repo(dest_repo)
for pr in source_gh_repo.get_pulls(state='open'):
print(f'Closing PR: {pr.html_url}')
pr.create_issue_comment(f'This repository has been moved to {dest_gh_repo.html_url}. \
There is not an easy way to transfer PRs, so if you would like to continue with this PR \
then please re-open it in the new repository and link to this PR.')
pr.edit(state='closed')
@click.group()
def migrate():
pass
migrate.add_command(migrate_repo_cmd)
migrate.add_command(migrate_issues_cmd)
migrate.add_command(clean_pull_requests_cmd)
migrate()