Skip to content

Commit

Permalink
Improve caching
Browse files Browse the repository at this point in the history
Refs #238
  • Loading branch information
jonashaag committed Jul 4, 2019
1 parent c361ba5 commit 0861604
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 26 deletions.
100 changes: 77 additions & 23 deletions klaus/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@
from klaus.diff import render_diff


NOT_SET = '__not_set__'


def cached_call(key, validator, producer, _cache={}):
data, old_validator = _cache.get(key, (None, NOT_SET))
if old_validator != validator:
data = producer()
_cache[key] = (data, validator)
return data


class FancyRepo(dulwich.repo.Repo):
"""A wrapper around Dulwich's Repo that adds some helper methods."""
# TODO: factor out stuff into dulwich
Expand All @@ -31,30 +42,33 @@ def name(self):

def get_last_updated_at(self):
"""Get datetime of last commit to this repository."""
def _get_last_updated_at():
refs = []
for ref_hash in self.get_refs().values():
try:
refs.append(self[ref_hash])
except KeyError:
# Whoops. The ref points at a non-existant object
pass
refs.sort(key=lambda obj:getattr(obj, 'commit_time', float('-inf')),
reverse=True)
for ref in refs:
# Find the latest ref that has a commit_time; tags do not
# have a commit time
if hasattr(ref, "commit_time"):
return ref.commit_time
return None

# Cache result to speed up repo_list.html template.
# If self.refs.keys() as changed, we should invalidate the cache.
cache_key = self.refs.keys()
if cache_key != getattr(self, '_last_updated_at_cache_key', None):
self._last_updated_at_cache_retval = _get_last_updated_at()
self._last_updated_at_cache_key = cache_key
return self._last_updated_at_cache_retval
# If self.get_refs() has changed, we should invalidate the cache.
all_refs = self.get_refs()
return cached_call(
key=(id(self), 'get_last_updated_at'),
validator=all_refs,
producer=lambda: self._get_last_updated_at(all_refs)
)

def _get_last_updated_at(self, all_refs):
resolveable_refs = []
for ref_hash in all_refs:
try:
resolveable_refs.append(self[ref_hash])
except KeyError:
# Whoops. The ref points at a non-existant object
pass
resolveable_refs.sort(
key=lambda obj:getattr(obj, 'commit_time', float('-inf')),
reverse=True
)
for ref in resolveable_refs:
# Find the latest ref that has a commit_time; tags do not
# have a commit time
if hasattr(ref, "commit_time"):
return ref.commit_time
return None

@property
def cloneurl(self):
Expand All @@ -72,6 +86,21 @@ def get_description(self):
"""Like Dulwich's `get_description`, but returns None if the file
contains Git's default text "Unnamed repository[...]".
"""
# Cache result to speed up repo_list.html template.
# If description file mtime has changed, we should invalidate the cache.
description_file = os.path.join(self._controldir, 'description')
try:
description_mtime = os.stat(os.path.join(self._controldir, 'description')).st_mtime
except OSError:
description_mtime = None

return cached_call(
key=(id(self), 'get_description'),
validator=description_mtime,
producer=self._get_description
)

def _get_description(self):
description = super(FancyRepo, self).get_description()
if description:
description = force_unicode(description)
Expand Down Expand Up @@ -275,3 +304,28 @@ def raw_commit_diff(self, commit):
bytesio = io.BytesIO()
dulwich.patch.write_tree_diff(bytesio, self.object_store, parent_tree, commit.tree)
return bytesio.getvalue()

def freeze(self):
return FrozenFancyRepo(self)


class FrozenFancyRepo(object):
"""A special version of FancyRepo that assumes the underlying Git
repository does not change. Used for performance optimizations.
"""
def __init__(self, repo):
self.__repo = repo
self.__last_updated_at = NOT_SET

def __setattr__(self, name, value):
if not name.startswith('_FrozenFancyRepo__'):
raise TypeError("Can't set %s attribute on FrozenFancyRepo" % name)
super(FrozenFancyRepo, self).__setattr__(name, value)

def __getattr__(self, name):
return getattr(self.__repo, name)

def fast_get_last_updated_at(self):
if self.__last_updated_at is NOT_SET:
self.__last_updated_at = self.__repo.get_last_updated_at()
return self.__last_updated_at
2 changes: 1 addition & 1 deletion klaus/templates/repo_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h2>
</h2>
<ul class=repolist>
{% for repo in repos %}
{% set last_updated_at = repo.get_last_updated_at() %}
{% set last_updated_at = repo.fast_get_last_updated_at() %}
{% set description = repo.get_description() %}
<li>
<a
Expand Down
5 changes: 3 additions & 2 deletions klaus/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ def repo_list():
if 'by-name' in request.args:
sort_key = lambda repo: repo.name
else:
sort_key = lambda repo: (-(repo.get_last_updated_at() or -1), repo.name)
repos = sorted(current_app.repos.values(), key=sort_key)
sort_key = lambda repo: (-(repo.fast_get_last_updated_at() or -1), repo.name)
repos = sorted([repo.freeze() for repo in current_app.repos.values()],
key=sort_key)
return render_template('repo_list.html', repos=repos, base_href=None)


Expand Down

0 comments on commit 0861604

Please sign in to comment.