Skip to content

Commit

Permalink
Merge pull request #173 from ebmdatalab/Jongmassey/codespaces-usage
Browse files Browse the repository at this point in the history
Add Codespace usage metrics
  • Loading branch information
Jongmassey authored May 31, 2024
2 parents 4627d70 + 3b7b084 commit 2d16d4b
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 2 deletions.
7 changes: 7 additions & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dokku$ dokku git:set metrics deploy-branch main
```bash
dokku config:set metrics GITHUB_EBMDATALAB_TOKEN='xxx'
dokku config:set metrics GITHUB_OS_CORE_TOKEN='xxx'
dokku config:set metrics GITHUB_OS_TOKEN='xxx'
dokku config:set metrics SLACK_SIGNING_SECRET='xxx'
dokku config:set metrics SLACK_TECH_SUPPORT_CHANNEL_ID='xxx'
dokku config:set metrics SLACK_TOKEN='xxx'
Expand All @@ -22,6 +23,12 @@ Each token is assigned to a single organisation and should have the following *r
* *all repositories* owned by the organisation with the following permissions:
Code scanning alerts, Dependabot alerts, Metadata, Pull requests and Repository security advisories

The `GITHUB_OS_TOKEN` is a fine-grained GitHub personal access token that is used for authenticating with the GitHub REST API.
It is assigned to a single organisation and should have the following *read-only* permissions:
* organisation permissions: codespaces
* *all repositories* owned by the organisation with the following permissions:
Codespaces and Metadata

## Disable checks
Dokku performs health checks on apps during deploy by sending requests to port 80.
This tool isn't a web app so it can't accept requests on a port.
Expand Down
1 change: 1 addition & 0 deletions dotenv-sample
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ TIMESCALEDB_URL=postgresql://user:pass@localhost:5433/metrics
# API tokens for pulling data from Github
GITHUB_EBMDATALAB_TOKEN=
GITHUB_OS_CORE_TOKEN=
GITHUB_OS_TOKEN=

# Slack API access credentials.
# The slack app used for this will need the following OAuth scopes:
Expand Down
8 changes: 8 additions & 0 deletions metrics/github/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ def rest_query(self, path, **variables):
data = response.json()
if isinstance(data, list):
yield from data

# Unlike the team repositories endpoint or the team members endpoint,
# which return arrays of the objects we're interested in,
# the codespaces endpoint returns an object.
# This object has a codespaces key,
# whose value is an array of the objects we're interested in.
elif "codespaces" in path and isinstance(data, dict):
yield from data["codespaces"]
else:
raise RuntimeError("Unexpected response format:", data)

Expand Down
26 changes: 26 additions & 0 deletions metrics/github/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,32 @@ def from_dict(cls, data, repo):
)


@dataclass(frozen=True)
class Codespace:
org: str
repo_name: str
user: str
created_at: datetime.datetime
last_used_at: datetime.datetime

@classmethod
def from_dict(cls, data, org):
return cls(
org=org,
repo_name=data["repository"]["name"],
user=data["owner"]["login"],
created_at=data["created_at"],
last_used_at=data["last_used_at"],
)


def codespaces(org):
return [
Codespace.from_dict(data=codespace, org=org)
for codespace in query.codespaces(org)
]


def tech_prs():
tech_team_members = _tech_team_members()
return [
Expand Down
13 changes: 13 additions & 0 deletions metrics/github/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,16 @@ def convert_issue_counts_to_metrics(counts):
}
)
return metrics


def convert_codespaces_to_dicts(codespaces):
return [
{
"organisation": c.org,
"repo": c.repo_name,
"user": c.user,
"created_at": c.created_at,
"last_used_at": c.last_used_at,
}
for c in codespaces
]
5 changes: 5 additions & 0 deletions metrics/github/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,16 @@ def issues(org, repo):
)


def codespaces(org):
yield from _client().rest_query("/orgs/{org}/codespaces", org=org)


def _client():
return GitHubClient(
tokens={
"ebmdatalab": os.environ["GITHUB_EBMDATALAB_TOKEN"],
"opensafely-core": os.environ["GITHUB_OS_CORE_TOKEN"],
"opensafely": os.environ["GITHUB_OS_TOKEN"],
}
)

Expand Down
24 changes: 24 additions & 0 deletions metrics/tasks/codespaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import sys

import structlog

import metrics.github.github as github
from metrics.github.metrics import convert_codespaces_to_dicts
from metrics.timescaledb import db, tables


log = structlog.get_logger()


def main():
log.info("Getting codespaces")
codespaces = github.codespaces(org="opensafely")
log.info(f"Got {len(codespaces)} codespaces")

log.info("Writing data")
db.upsert(tables.GitHubCodespaces, convert_codespaces_to_dicts(codespaces))
log.info("Written data")


if __name__ == "__main__":
sys.exit(main())
44 changes: 42 additions & 2 deletions metrics/timescaledb/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,55 @@ def reset_table(table, batch_size=None):


def write(table, rows):
max_params = 65535 # limit for postgresql
batch_size = max_params // len(table.columns)
batch_size = _batch_size(table)

with _get_engine().begin() as connection:
for values in batched(rows, batch_size):
connection.execute(insert(table).values(values))
log.info("Inserted %s rows", len(values), table=table.name)


def upsert(table, rows):
_ensure_table(table)
batch_size = _batch_size(table)
non_pk_columns = set(table.columns) - set(table.primary_key.columns)

# use the primary key constraint to match rows to be updated,
# using default constraint name if not otherwise specified
constraint = table.primary_key.name or table.name + "_pkey"

with _get_engine().begin() as connection:
for values in batched(rows, batch_size):
# Construct a PostgreSQL "INSERT..ON CONFLICT" style upsert statement
# https://docs.sqlalchemy.org/en/20/dialects/postgresql.html#insert-on-conflict-upsert

# "Vanilla" statement to start, we need this to be able to derive
# the "excluded" columns in the values which we need to use to update
# the target table in case of conflict on the constraint
insert_stmt = insert(table).values(values)

# This dict dicates which columns in the target table are updated (the
# non-PK columns) and the corresponding values with which they are updated
update_set_clause = {
c: insert_stmt.excluded[c.name] for c in non_pk_columns
}

# Extend the insert statement to include checking for row conflicts using
# the primary key constraint and telling the database to update
# the conflicting rows according to the SET clause
insert_stmt = insert_stmt.on_conflict_do_update(
constraint=constraint,
set_=update_set_clause,
)
connection.execute(insert_stmt)
log.info("Inserted %s rows", len(values), table=table.name)


def _batch_size(table):
max_params = 65535 # limit for postgresql
return max_params // len(table.columns)


def _drop_table(table, batch_size):
with _get_engine().begin() as connection:
log.debug("Removing table: %s", table.name)
Expand Down
10 changes: 10 additions & 0 deletions metrics/timescaledb/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@
metadata = MetaData()


GitHubCodespaces = Table(
"github_codespaces",
metadata,
Column("created_at", TIMESTAMP(timezone=True), primary_key=True),
Column("organisation", Text, primary_key=True),
Column("repo", Text, primary_key=True),
Column("user", Text, primary_key=True),
Column("last_used_at", TIMESTAMP(timezone=True)),
)

GitHubRepos = Table(
"github_repos",
metadata,
Expand Down
17 changes: 17 additions & 0 deletions tests/metrics/github/test_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,23 @@ def fake(*keys):
return patch


def test_codespaces(patch):
patch(
"codespaces",
{
"opensafely": [
{
"owner": {"login": "testuser"},
"repository": {"name": "testrepo"},
"created_at": datetime.datetime.now().isoformat(),
"last_used_at": datetime.datetime.now().isoformat(),
},
]
},
)
assert len(github.codespaces("opensafely")) == 1


def test_includes_tech_owned_repos(patch):
patch(
"team_repos",
Expand Down
26 changes: 26 additions & 0 deletions tests/metrics/timescaledb/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,29 @@ def test_write(engine, table):
# check rows are in table
rows = get_rows(engine, table)
assert len(rows) == 3


def test_upsert(engine, table):
# add a non-PK column to the table
table.append_column(Column("value2", Text))

# insert initial rows
rows = [{"value": i, "value2": "a"} for i in range(1, 4)]
db.upsert(table, rows)

# second batch of rows, some with preexisting value1, some new
# all with different value2
rows = [{"value": i, "value2": "b"} for i in range(3, 6)]
db.upsert(table, rows)

# check all rows are in table
rows = get_rows(engine, table)
assert len(rows) == 5

# check upsert leaves unmatched rows 1-2 intact
original_rows = [r for r in rows if int(r[0]) < 3]
assert original_rows == [("1", "a"), ("2", "a")]

# check upsert modifies matched row 3 and new rows 4-5
modified_rows = [r for r in rows if int(r[0]) >= 3]
assert modified_rows == [("3", "b"), ("4", "b"), ("5", "b")]

0 comments on commit 2d16d4b

Please sign in to comment.