Skip to content

Commit

Permalink
github-to-sqlite starred' command, closes #1
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Sep 14, 2019
1 parent 321e028 commit f1fa627
Show file tree
Hide file tree
Showing 7 changed files with 333 additions and 3 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,15 @@ The `issues` command retrieves all of the issues belonging to a specified reposi
$ github-to-sqlite issues github.db simonw/datasette

If an `auth.json` file is present it will use the token from that file. It works without authentication for public repositories but you should be aware that GitHub have strict IP-based rate limits for unauthenticated requests.

You can point to a different location of `auth.json` using `-a`:

$ github-to-sqlite issues github.db simonw/datasette -a /path/to/auth.json

## Fetching repos that have been starred by a user

The `starred` command fetches the repos that have been starred by a user.

$ github-to-sqlite starred github.db simonw

If you are using an `auth.json` file you can omit the username to retrieve the starred repos for the authenticated user.
42 changes: 42 additions & 0 deletions github_to_sqlite/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,45 @@ def issues(db_path, repo, auth, load):
token = None

utils.save_issues(db, utils.fetch_all_issues(repo, token))


@cli.command()
@click.argument(
"db_path",
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
required=True,
)
@click.argument("username", type=str, required=False)
@click.option(
"-a",
"--auth",
type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
default="auth.json",
help="Path to auth.json token file",
)
@click.option(
"--load",
type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
help="Load issues JSON from this file instead of the API",
)
def starred(db_path, username, auth, load):
"Save repos starred by the specified (or authenticated) username"
db = sqlite_utils.Database(db_path)
try:
token = json.load(open(auth))["github_personal_token"]
except (KeyError, FileNotFoundError):
token = None

if load:
stars = json.load(open(load))
else:
stars = utils.fetch_all_starred(username, token)

# Which user are we talking about here?
if username:
user = utils.fetch_user(username, token)
else:
user = utils.fetch_user(token=token)

utils.save_stars(db, user, stars)
utils.ensure_repo_fts(db)
79 changes: 76 additions & 3 deletions github_to_sqlite/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ def save_user(db, user):
for key, value in user.items()
if (key in ("avatar_url", "html_url") or not key.endswith("url"))
}
# If this user was nested in repo they will be missing several fields
# so fill in 'name' from 'login' so Datasette foreign keys display
if to_save.get("name") is None:
to_save["name"] = to_save["login"]
return db["users"].upsert(to_save, pk="id").last_pk


Expand All @@ -71,15 +75,63 @@ def save_milestone(db, milestone):
)


def fetch_repo(repo, token=None):
headers = make_headers(token)
owner, slug = repo.split("/")
url = "https://api.github.com/repos/{}/{}".format(owner, slug)
return requests.get(url, headers=headers).json()


def save_repo(db, repo):
# Remove all url fields except html_url
to_save = {
key: value
for key, value in repo.items()
if (key == "html_url") or not key.endswith("url")
}
to_save["owner"] = save_user(db, to_save["owner"])
repo_id = (
db["repos"]
.upsert(to_save, pk="id", foreign_keys=(("owner", "users", "id"),))
.last_pk
)
return repo_id


def ensure_repo_fts(db):
if "repos_fts" not in db.table_names():
db["repos"].enable_fts(["name", "description"], create_triggers=True)


def fetch_all_issues(repo, token=None):
headers = {}
if token is not None:
headers["Authorization"] = "token {}".format(token)
headers = make_headers(token)
url = "https://api.github.com/repos/{}/issues?state=all&filter=all".format(repo)
for issues in paginate(url, headers):
yield from issues


def fetch_all_starred(username=None, token=None):
assert username or token, "Must provide username= or token= or both"
headers = make_headers(token)
headers["Accept"] = "application/vnd.github.v3.star+json"
if username:
url = "https://api.github.com/users/{}/starred".format(username)
else:
url = "https://api.github.com/user/starred"
for stars in paginate(url, headers):
yield from stars


def fetch_user(username=None, token=None):
assert username or token, "Must provide username= or token= or both"
headers = make_headers(token)
if username:
url = "https://api.github.com/users/{}".format(username)
else:
url = "https://api.github.com/user"
return requests.get(url, headers=headers).json()


def paginate(url, headers=None):
while url:
response = requests.get(url, headers=headers)
Expand All @@ -88,3 +140,24 @@ def paginate(url, headers=None):
except AttributeError:
url = None
yield response.json()


def make_headers(token=None):
headers = {}
if token is not None:
headers["Authorization"] = "token {}".format(token)
return headers


def save_stars(db, user, stars):
user_id = save_user(db, user)

for star in stars:
starred_at = star["starred_at"]
repo = star["repo"]
repo_id = save_repo(db, repo)
db["stars"].upsert(
{"user": user_id, "repo": repo_id, "starred_at": starred_at},
pk=("user", "repo"),
foreign_keys=("user", "repo"),
)
99 changes: 99 additions & 0 deletions tests/starred.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
[
{
"starred_at": "2019-09-14T08:35:12Z",
"repo": {
"id": 123,
"node_id": "MDEwOlJlcG9zaccbcckyMDgzNjkxNTM=",
"name": "repo-name",
"full_name": "owner-name/repo-name",
"private": false,
"owner": {
"login": "owner-name",
"id": 456,
"node_id": "OANEUTHEUONTH=",
"avatar_url": "https://avatars2.githubusercontent.com/u/456?v=4",
"gravatar_id": "",
"url": "https://api.github.com/users/owner-name",
"html_url": "https://github.com/owner-name",
"followers_url": "https://api.github.com/users/owner-name/followers",
"following_url": "https://api.github.com/users/owner-name/following{/other_user}",
"gists_url": "https://api.github.com/users/owner-name/gists{/gist_id}",
"starred_url": "https://api.github.com/users/owner-name/starred{/owner}{/repo}",
"subscriptions_url": "https://api.github.com/users/owner-name/subscriptions",
"organizations_url": "https://api.github.com/users/owner-name/orgs",
"repos_url": "https://api.github.com/users/owner-name/repos",
"events_url": "https://api.github.com/users/owner-name/events{/privacy}",
"received_events_url": "https://api.github.com/users/owner-name/received_events",
"type": "User",
"site_admin": false
},
"html_url": "https://github.com/owner-name/repo-name",
"description": "Repo description",
"fork": false,
"url": "https://api.github.com/repos/owner-name/repo-name",
"forks_url": "https://api.github.com/repos/owner-name/repo-name/forks",
"keys_url": "https://api.github.com/repos/owner-name/repo-name/keys{/key_id}",
"collaborators_url": "https://api.github.com/repos/owner-name/repo-name/collaborators{/collaborator}",
"teams_url": "https://api.github.com/repos/owner-name/repo-name/teams",
"hooks_url": "https://api.github.com/repos/owner-name/repo-name/hooks",
"issue_events_url": "https://api.github.com/repos/owner-name/repo-name/issues/events{/number}",
"events_url": "https://api.github.com/repos/owner-name/repo-name/events",
"assignees_url": "https://api.github.com/repos/owner-name/repo-name/assignees{/user}",
"branches_url": "https://api.github.com/repos/owner-name/repo-name/branches{/branch}",
"tags_url": "https://api.github.com/repos/owner-name/repo-name/tags",
"blobs_url": "https://api.github.com/repos/owner-name/repo-name/git/blobs{/sha}",
"git_tags_url": "https://api.github.com/repos/owner-name/repo-name/git/tags{/sha}",
"git_refs_url": "https://api.github.com/repos/owner-name/repo-name/git/refs{/sha}",
"trees_url": "https://api.github.com/repos/owner-name/repo-name/git/trees{/sha}",
"statuses_url": "https://api.github.com/repos/owner-name/repo-name/statuses/{sha}",
"languages_url": "https://api.github.com/repos/owner-name/repo-name/languages",
"stargazers_url": "https://api.github.com/repos/owner-name/repo-name/stargazers",
"contributors_url": "https://api.github.com/repos/owner-name/repo-name/contributors",
"subscribers_url": "https://api.github.com/repos/owner-name/repo-name/subscribers",
"subscription_url": "https://api.github.com/repos/owner-name/repo-name/subscription",
"commits_url": "https://api.github.com/repos/owner-name/repo-name/commits{/sha}",
"git_commits_url": "https://api.github.com/repos/owner-name/repo-name/git/commits{/sha}",
"comments_url": "https://api.github.com/repos/owner-name/repo-name/comments{/number}",
"issue_comment_url": "https://api.github.com/repos/owner-name/repo-name/issues/comments{/number}",
"contents_url": "https://api.github.com/repos/owner-name/repo-name/contents/{+path}",
"compare_url": "https://api.github.com/repos/owner-name/repo-name/compare/{base}...{head}",
"merges_url": "https://api.github.com/repos/owner-name/repo-name/merges",
"archive_url": "https://api.github.com/repos/owner-name/repo-name/{archive_format}{/ref}",
"downloads_url": "https://api.github.com/repos/owner-name/repo-name/downloads",
"issues_url": "https://api.github.com/repos/owner-name/repo-name/issues{/number}",
"pulls_url": "https://api.github.com/repos/owner-name/repo-name/pulls{/number}",
"milestones_url": "https://api.github.com/repos/owner-name/repo-name/milestones{/number}",
"notifications_url": "https://api.github.com/repos/owner-name/repo-name/notifications{?since,all,participating}",
"labels_url": "https://api.github.com/repos/owner-name/repo-name/labels{/name}",
"releases_url": "https://api.github.com/repos/owner-name/repo-name/releases{/id}",
"deployments_url": "https://api.github.com/repos/owner-name/repo-name/deployments",
"created_at": "2019-09-14T00:50:14Z",
"updated_at": "2019-09-14T14:28:32Z",
"pushed_at": "2019-09-14T07:02:40Z",
"git_url": "git://github.com/owner-name/repo-name.git",
"ssh_url": "[email protected]:owner-name/repo-name.git",
"clone_url": "https://github.com/owner-name/repo-name.git",
"svn_url": "https://github.com/owner-name/repo-name",
"homepage": null,
"size": 7,
"stargazers_count": 2,
"watchers_count": 2,
"language": "Python",
"has_issues": true,
"has_projects": true,
"has_downloads": true,
"has_wiki": true,
"has_pages": false,
"forks_count": 0,
"mirror_url": null,
"archived": false,
"disabled": false,
"open_issues_count": 0,
"license": null,
"forks": 0,
"open_issues": 0,
"watchers": 2,
"default_branch": "master"
}
}
]
1 change: 1 addition & 0 deletions tests/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def test_users(db):
"html_url": "https://github.com/simonw",
"type": "User",
"site_admin": 0,
"name": "simonw",
}
] == user_rows

Expand Down
70 changes: 70 additions & 0 deletions tests/test_starred.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from github_to_sqlite import utils
import json
import pathlib
import pytest
import sqlite_utils


@pytest.fixture
def starred():
return json.load(open(pathlib.Path(__file__).parent / "starred.json"))


@pytest.fixture
def user():
return json.load(open(pathlib.Path(__file__).parent / "user.json"))


@pytest.fixture
def db(starred, user):
db = sqlite_utils.Database(memory=True)
utils.save_stars(db, user, starred)
return db


def test_tables(db):
assert {"repos", "stars", "users"} == set(db.table_names())


def test_repos(db):
repos = list(db["repos"].rows)
assert [
{
"id": 123,
"node_id": "MDEwOlJlcG9zaccbcckyMDgzNjkxNTM=",
"name": "repo-name",
"full_name": "owner-name/repo-name",
"private": 0,
"owner": 456,
"html_url": "https://github.com/owner-name/repo-name",
"description": "Repo description",
"fork": 0,
"created_at": "2019-09-14T00:50:14Z",
"updated_at": "2019-09-14T14:28:32Z",
"pushed_at": "2019-09-14T07:02:40Z",
"homepage": None,
"size": 7,
"stargazers_count": 2,
"watchers_count": 2,
"language": "Python",
"has_issues": 1,
"has_projects": 1,
"has_downloads": 1,
"has_wiki": 1,
"has_pages": 0,
"forks_count": 0,
"archived": 0,
"disabled": 0,
"open_issues_count": 0,
"license": None,
"forks": 0,
"open_issues": 0,
"watchers": 2,
"default_branch": "master",
}
] == repos


def test_stars(db):
stars = list(db["stars"].rows)
assert [{"user": 9599, "repo": 123, "starred_at": "2019-09-14T08:35:12Z"}] == stars
33 changes: 33 additions & 0 deletions tests/user.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"login": "simonw",
"id": 9599,
"node_id": "MDQ6VXNlcjk1OTk=",
"avatar_url": "https://avatars0.githubusercontent.com/u/9599?v=4",
"gravatar_id": "",
"url": "https://api.github.com/users/simonw",
"html_url": "https://github.com/simonw",
"followers_url": "https://api.github.com/users/simonw/followers",
"following_url": "https://api.github.com/users/simonw/following{/other_user}",
"gists_url": "https://api.github.com/users/simonw/gists{/gist_id}",
"starred_url": "https://api.github.com/users/simonw/starred{/owner}{/repo}",
"subscriptions_url": "https://api.github.com/users/simonw/subscriptions",
"organizations_url": "https://api.github.com/users/simonw/orgs",
"repos_url": "https://api.github.com/users/simonw/repos",
"events_url": "https://api.github.com/users/simonw/events{/privacy}",
"received_events_url": "https://api.github.com/users/simonw/received_events",
"type": "User",
"site_admin": false,
"name": "Simon Willison",
"company": "-",
"blog": "https://simonwillison.net/",
"location": "San Francisco, CA",
"email": null,
"hireable": null,
"bio": null,
"public_repos": 218,
"public_gists": 191,
"followers": 1269,
"following": 129,
"created_at": "2008-05-07T17:22:14Z",
"updated_at": "2019-09-09T02:43:29Z"
}

0 comments on commit f1fa627

Please sign in to comment.