Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Podcast Merge Queue [WIP] #60

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions mygpo/administration/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,18 @@ def __get_episodes(self):


def group(self, get_features):
""" Groups the episodes by features extracted using ``get_features``

get_features is a callable that expects an episode as parameter, and
returns a value representing the extracted feature(s).
"""

episodes = self.__get_episodes()

episode_groups = defaultdict(list)

episode_features = map(get_features, episodes.items())

for features, episode_id in episode_features:
episode = episodes[episode_id]
for episode in episodes.values():
features = get_features(episode)
episode_groups[features].append(episode)

groups = sorted(episode_groups.values(), key=_SORT_KEY)
Expand Down
17 changes: 11 additions & 6 deletions mygpo/administration/tasks.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import uuid
from collections import Counter

from mygpo.podcasts.models import Podcast
from mygpo.celery import celery
from mygpo.maintenance.merge import PodcastMerger
from mygpo.maintenance.models import MergeTask

from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)


@celery.task
def merge_podcasts(podcast_ids, num_groups):
def merge_podcasts(podcast_ids, num_groups, queue_id=''):
""" Task to merge some podcasts"""

logger.info('merging podcast ids %s', podcast_ids)
Expand All @@ -18,11 +20,14 @@ def merge_podcasts(podcast_ids, num_groups):

logger.info('merging podcasts %s', podcasts)

actions = Counter()

pm = PodcastMerger(podcasts, actions, num_groups)
pm = PodcastMerger(podcasts, num_groups)
podcast = pm.merge()

logger.info('merging result: %s', actions)
logger.info('merging successful')

if queue_id:
qid = uuid.UUID(queue_id)
logger.info('Deleting merge queue entry {}'.format(qid))
MergeTask.objects.filter(id=qid).delete()

return actions, podcast
return podcast
28 changes: 16 additions & 12 deletions mygpo/administration/templates/admin/merge-grouping.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ <h1>{% trans "Merge Podcasts and Episodes" %}</h1>
{% trans "Episodes that have the same number will be merged. Please verify all your changes by clicking on 'Renew Groups' before starting the Merge." %}
</div>

<form method="post" action="{% url "admin-merge-process" %}">
<form method="post" action="{% url "admin-merge-process" task.pk %}">
{% csrf_token %}

{% for podcast in podcasts %}
Expand All @@ -39,18 +39,21 @@ <h1>{% trans "Merge Podcasts and Episodes" %}</h1>
{% endfor %}
</tr>

{% for n, episodes in groups %}
{% for line in groups %}
<tr>
<th>{{ n }}</th>
<th>{{ forloop.counter }}</th>

{% for podcast in podcasts %}
{% for episode in line %}
<td>
{% for episode in episodes %}
{% if episode.podcast == podcast.get_id %}
<input type="text" name="episode_{% get_id episode %}" value="{{ n }}" size="2"/>
{% episode_link episode podcast %}<br />
{% endif %}
{% endfor %}
{% if episode %}
<input
type="text"
name="episode_{{ episode.get_id }}"
value="{{ forloop.parentloop.counter }}"
size="2"
/>
{% episode_link episode episode.podcast %}<br />
{% endif %}
</td>
{% endfor %}

Expand All @@ -59,11 +62,12 @@ <h1>{% trans "Merge Podcasts and Episodes" %}</h1>

<tr>
<td></td>
<td><input type="submit" name="renew" value="Renew Groups" /></td>
<td><input type="submit" name="merge" value="Merge!" /></td>
<td><button formaction="{% url "admin-merge-update" task.pk %}">Renew Groups</button></td>
<td><button>Merge</button></td>
</tr>

</table>

</form>


Expand Down
10 changes: 9 additions & 1 deletion mygpo/administration/templates/admin/merge-select.html
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ <h1>{% trans "Merge Podcasts and Episodes" %}</h1>
{% endblock %}

{% block content %}
<form method="post" action="{% url "admin-merge-verify" %}" role="form">
<div>
Queue Length: {{ queue_length }}
{% if task %}
- <a href="{% url "admin-merge-verify" task.pk %}">Take from Queue</a>
{% endif %}
</div>

<form method="post" action="{% url "admin-merge-create" %}" role="form">
{% csrf_token %}
{% for url in urls %}
<div class="form-group">
Expand All @@ -22,6 +29,7 @@ <h1>{% trans "Merge Podcasts and Episodes" %}</h1>
</div>
{% endfor %}
<input class="btn btn-default" type="submit" value="OK" />
<input class="hidden" name="queue_id" value="{{ queue_id }}" />
</form>

{% endblock %}
Expand Down
10 changes: 0 additions & 10 deletions mygpo/administration/templates/admin/task-status.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,6 @@ <h1>

{% if ready %}

<p>{% trans "The following actions were recorded:" %}
<ul>
{% for action, count in actions %}
<li>{{ action }}: {{ count }}</li>
{% empty %}
<li><em>{% trans "none" %}</em></li>
{% endfor %}
</ul>
</p>

<p>{% trans "Go to podcast" %} {% podcast_group_link podcast %}</p>
{% else %}
<p>{% trans "The operation is still ongoing..." %}</p>
Expand Down
4 changes: 1 addition & 3 deletions mygpo/administration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,11 @@ def test_merge(self):
# we need that for later
e3_id = e3.pk

actions = Counter()

# decide which episodes to merge
groups = [(0, [e1]), (1, [e2, e3]), (2, [e4])]

# carry out the merge
pm = PodcastMerger([p1, p2], actions, groups)
pm = PodcastMerger([p1, p2], groups)
pm.merge()

e1 = Episode.objects.get(pk=e1.pk)
Expand Down
14 changes: 13 additions & 1 deletion mygpo/administration/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,23 @@
views.MergeSelect.as_view(),
name='admin-merge'),

path('merge/create',
views.CreateMergeTask.as_view(),
name='admin-merge-create'),

path('merge/verify/<uuid:task_id>',
views.MergeVerify.as_view(),
name='admin-merge-verify'),

path('merge/update/<uuid:task_id>',
views.UpdateMergeTask.as_view(),
name='admin-merge-update'),

path('merge/verify',
views.MergeVerify.as_view(),
name='admin-merge-verify'),

path('merge/process',
path('merge/process/<uuid:task_id>',
views.MergeProcess.as_view(),
name='admin-merge-process'),

Expand Down
116 changes: 56 additions & 60 deletions mygpo/administration/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import socket
import uuid
from itertools import count, chain
from collections import Counter
from datetime import datetime
Expand All @@ -17,15 +18,14 @@
from django.template import RequestContext
from django.utils.translation import ugettext as _
from django.contrib.sites.requests import RequestSite
from django.views.generic import TemplateView
from django.views.generic import TemplateView, View
from django.utils.decorators import method_decorator
from django.conf import settings
from django.contrib.auth import get_user_model

from mygpo.podcasts.models import Podcast, Episode
from mygpo.administration.auth import require_staff
from mygpo.administration.group import PodcastGrouper
from mygpo.maintenance.merge import PodcastMerger, IncorrectMergeException
from mygpo.maintenance.models import MergeTask
from mygpo.administration.clients import UserAgentStats, ClientStats
from mygpo.administration.tasks import merge_podcasts
from mygpo.utils import get_git_head
Expand Down Expand Up @@ -106,18 +106,33 @@ class MergeSelect(AdminView):
template_name = 'admin/merge-select.html'

def get(self, request):
queue_length = MergeTask.objects.count()
task = MergeTask.objects.first()

num = int(request.GET.get('podcasts', 2))
urls = [''] * num
queue_id = ''

return self.render_to_response({
'queue_length': queue_length,
'urls': urls,
'task': task,
})


class MergeBase(AdminView):
class CreateMergeTask(AdminView):

def post(self, request):
podcasts = self._get_podcasts(request)

task = MergeTask.objects.create_from_podcasts(podcasts)

return HttpResponseRedirect(
reverse('admin-merge-verify', args=[task.id])
)

def _get_podcasts(self, request):
podcasts = []

for n in count():
podcast_url = request.POST.get('feed%d' % n, None)
if podcast_url is None:
Expand All @@ -127,82 +142,68 @@ def _get_podcasts(self, request):
continue

p = Podcast.objects.get(urls__url=podcast_url)
podcasts.append(p)
yield p


return podcasts
class MergeBase(AdminView):
pass


class MergeVerify(MergeBase):

template_name = 'admin/merge-grouping.html'

def post(self, request):

try:
podcasts = self._get_podcasts(request)

grouper = PodcastGrouper(podcasts)

get_features = lambda id_e: ((id_e[1].url, id_e[1].title), id_e[0])

num_groups = grouper.group(get_features)


except InvalidPodcast as ip:
messages.error(request,
_('No podcast with URL {url}').format(url=str(ip)))

podcasts = []
num_groups = []

def get(self, request, task_id):
task = MergeTask.objects.get(id=uuid.UUID(task_id))
podcasts = list(sorted(task.podcasts, key=lambda p: p.subscribers))
groups = task.episode_groups()
return self.render_to_response({
'podcasts': podcasts,
'groups': num_groups,
})
'podcasts': podcasts,
'groups': groups,
'task': task,
})


class MergeProcess(MergeBase):
class UpdateMergeTask(View):

RE_EPISODE = re.compile(r'episode_([0-9a-fA-F]{32})')

def post(self, request):
def post(self, request, task_id):
task = MergeTask.objects.get(id=uuid.UUID(task_id))
podcasts = task.podcasts

try:
podcasts = self._get_podcasts(request)
features = self._features_from_post(request.POST)
get_features = lambda episode: features[episode.id]

except InvalidPodcast as ip:
messages.error(request,
_('No podcast with URL {url}').format(url=str(ip)))
# update groups within MergeTask
task.set_groups(get_features)
task.save()

grouper = PodcastGrouper(podcasts)
return HttpResponseRedirect(
reverse('admin-merge-verify', args=[task.id])
)

def _features_from_post(self, post):
features = {}
for key, feature in request.POST.items():
for key, feature in post.items():
m = self.RE_EPISODE.match(key)
if m:
episode_id = m.group(1)
episode_id = uuid.UUID(m.group(1))
features[episode_id] = feature

get_features = lambda id_e: (features.get(id_e[0], id_e[0]), id_e[0])
return features

num_groups = grouper.group(get_features)

if 'renew' in request.POST:
return render(request, 'admin/merge-grouping.html', {
'podcasts': podcasts,
'groups': num_groups,
})

class MergeProcess(MergeBase):

elif 'merge' in request.POST:
def post(self, request, task_id):

podcast_ids = [p.get_id() for p in podcasts]
num_groups = list(num_groups)
task = MergeTask.objects.get(id=uuid.UUID(task_id))

res = merge_podcasts.delay(podcast_ids, num_groups)
res = merge_podcasts.delay(task.pk)

return HttpResponseRedirect(reverse('admin-merge-status',
args=[res.task_id]))
return HttpResponseRedirect(reverse('admin-merge-status',
args=[res.task_id]))


class MergeStatus(AdminView):
Expand All @@ -222,16 +223,11 @@ def get(self, request, task_id):
# TODO: what to do with multiple frontends?
cache.clear()

try:
actions, podcast = result.get()

except IncorrectMergeException as ime:
messages.error(request, str(ime))
return HttpResponseRedirect(reverse('admin-merge'))
podcast_id = result.get()
podcast = Podcast.objects.get(id=podcast_id)

return self.render_to_response({
'ready': True,
'actions': actions.items(),
'podcast': podcast,
})

Expand Down
Loading