diff --git a/mygpo/administration/templates/admin/merge-grouping.html b/mygpo/administration/templates/admin/merge-grouping.html index dc4354884..7b48c5fb6 100644 --- a/mygpo/administration/templates/admin/merge-grouping.html +++ b/mygpo/administration/templates/admin/merge-grouping.html @@ -24,7 +24,7 @@

{% trans "Merge Podcasts and Episodes" %}

{% trans "Episodes that have the same number will be merged. Please verify all your changes by clicking on 'Renew Groups' before starting the Merge." %} -
+ {% csrf_token %} {% for podcast in podcasts %} @@ -39,18 +39,21 @@

{% trans "Merge Podcasts and Episodes" %}

{% endfor %} - {% for n, episodes in groups %} + {% for line in groups %} - {{ n }} + {{ forloop.counter }} - {% for podcast in podcasts %} + {% for episode in line %} - {% for episode in episodes %} - {% if episode.podcast.get_id == podcast.get_id %} - - {% episode_link episode podcast %}
- {% endif %} - {% endfor %} + {% if episode %} + + {% episode_link episode episode.podcast %}
+ {% endif %} {% endfor %} @@ -59,14 +62,12 @@

{% trans "Merge Podcasts and Episodes" %}

- - + + - -
diff --git a/mygpo/administration/templates/admin/merge-select.html b/mygpo/administration/templates/admin/merge-select.html index 6e1fc40c5..56d2170e4 100644 --- a/mygpo/administration/templates/admin/merge-select.html +++ b/mygpo/administration/templates/admin/merge-select.html @@ -13,14 +13,14 @@

{% trans "Merge Podcasts and Episodes" %}

{% endblock %} {% block content %} - {% if queue_length > 0 %}
- Queue Length: {{ queue_length }} - - Take from Queue + Queue Length: {{ queue_length }} + {% if task %} + - Take from Queue + {% endif %}
- {% endif %} -
+ {% csrf_token %} {% for url in urls %}
diff --git a/mygpo/administration/templates/admin/task-status.html b/mygpo/administration/templates/admin/task-status.html index 9abe6bdef..d7da3dff1 100644 --- a/mygpo/administration/templates/admin/task-status.html +++ b/mygpo/administration/templates/admin/task-status.html @@ -34,16 +34,6 @@

{% if ready %} -

{% trans "The following actions were recorded:" %} -

    - {% for action, count in actions %} -
  • {{ action }}: {{ count }}
  • - {% empty %} -
  • {% trans "none" %}
  • - {% endfor %} -
-

-

{% trans "Go to podcast" %} {% podcast_group_link podcast %}

{% else %}

{% trans "The operation is still ongoing..." %}

diff --git a/mygpo/administration/urls.py b/mygpo/administration/urls.py index d9fbd58f6..91677ecf8 100644 --- a/mygpo/administration/urls.py +++ b/mygpo/administration/urls.py @@ -16,11 +16,19 @@ views.MergeSelect.as_view(), name='admin-merge'), - url(r'^merge/verify$', + url(r'^merge/create$', + views.CreateMergeTask.as_view(), + name='admin-merge-create'), + + url(r'^merge/verify/(?P[^/]+)$', views.MergeVerify.as_view(), name='admin-merge-verify'), - url(r'^merge/process$', + url(r'^merge/update/(?P[^/]+)$', + views.UpdateMergeTask.as_view(), + name='admin-merge-update'), + + url(r'^merge/process/(?P[^/]+)$', views.MergeProcess.as_view(), name='admin-merge-process'), diff --git a/mygpo/administration/views.py b/mygpo/administration/views.py index 5c769c85e..6b2d4f842 100644 --- a/mygpo/administration/views.py +++ b/mygpo/administration/views.py @@ -15,15 +15,13 @@ from django.template import RequestContext from django.utils.translation import ugettext as _ from django.contrib.sites.requests import RequestSite -from django.views.generic import TemplateView +from django.views.generic import TemplateView, View from django.utils.decorators import method_decorator from django.conf import settings from django.contrib.auth import get_user_model from mygpo.podcasts.models import Podcast, Episode from mygpo.administration.auth import require_staff -from mygpo.administration.group import PodcastGrouper -from mygpo.maintenance.merge import PodcastMerger, IncorrectMergeException from mygpo.maintenance.models import MergeTask from mygpo.administration.clients import UserAgentStats, ClientStats from mygpo.administration.tasks import merge_podcasts @@ -92,29 +90,32 @@ class MergeSelect(AdminView): def get(self, request): queue_length = MergeTask.objects.count() + task = MergeTask.objects.first() - use_queue = bool(request.GET.get('queue', False)) - if use_queue: - queue = MergeTask.objects.first() - urls = [podcast.url for podcast in queue.podcasts] - queue_id = queue.id.hex - - else: - num = int(request.GET.get('podcasts', 2)) - urls = [''] * num - queue_id = '' + num = int(request.GET.get('podcasts', 2)) + urls = [''] * num + queue_id = '' return self.render_to_response({ 'queue_length': queue_length, 'urls': urls, - 'queue_id': queue_id, + 'task': task, }) -class MergeBase(AdminView): +class CreateMergeTask(AdminView): + + def post(self, request): + podcasts = self._get_podcasts(request) + + task = MergeTask.objects.create_from_podcasts(podcasts) + + return HttpResponseRedirect( + reverse('admin-merge-verify', args=[task.id]) + ) def _get_podcasts(self, request): - podcasts = [] + for n in count(): podcast_url = request.POST.get('feed%d' % n, None) if podcast_url is None: @@ -124,84 +125,68 @@ def _get_podcasts(self, request): continue p = Podcast.objects.get(urls__url=podcast_url) - podcasts.append(p) + yield p - return podcasts + +class MergeBase(AdminView): + pass class MergeVerify(MergeBase): template_name = 'admin/merge-grouping.html' - def post(self, request): - - try: - podcasts = self._get_podcasts(request) - - grouper = PodcastGrouper(podcasts) - - get_features = lambda episode: (episode.url, episode.title) - - num_groups = grouper.group(get_features) - - except InvalidPodcast as ip: - messages.error(request, - _('No podcast with URL {url}').format(url=str(ip))) - - podcasts = [] - num_groups = [] - + def get(self, request, task_id): + task = MergeTask.objects.get(id=uuid.UUID(task_id)) + podcasts = list(sorted(task.podcasts, key=lambda p: p.subscribers)) + groups = task.episode_groups() return self.render_to_response({ - 'queue_id': request.POST.get('queue_id', ''), - 'podcasts': podcasts, - 'groups': num_groups, - }) + 'podcasts': podcasts, + 'groups': groups, + 'task': task, + }) -class MergeProcess(MergeBase): +class UpdateMergeTask(View): RE_EPISODE = re.compile(r'episode_([0-9a-fA-F]{32})') - def post(self, request): + def post(self, request, task_id): + task = MergeTask.objects.get(id=uuid.UUID(task_id)) + podcasts = task.podcasts - try: - podcasts = self._get_podcasts(request) + features = self._features_from_post(request.POST) + get_features = lambda episode: features[episode.id] - except InvalidPodcast as ip: - messages.error(request, - _('No podcast with URL {url}').format(url=str(ip))) + # update groups within MergeTask + task.set_groups(get_features) + task.save() - grouper = PodcastGrouper(podcasts) + return HttpResponseRedirect( + reverse('admin-merge-verify', args=[task.id]) + ) + def _features_from_post(self, post): features = {} - for key, feature in request.POST.items(): + for key, feature in post.items(): m = self.RE_EPISODE.match(key) if m: episode_id = uuid.UUID(m.group(1)) features[episode_id] = feature - get_features = lambda episode: features[episode.id] - - num_groups = grouper.group(get_features) - queue_id = request.POST.get('queue_id', '') + return features - if 'renew' in request.POST: - return render(request, 'admin/merge-grouping.html', { - 'queue_id': queue_id, - 'podcasts': podcasts, - 'groups': num_groups, - }) +class MergeProcess(MergeBase): - elif 'merge' in request.POST: + def post(self, request, task_id): - podcast_ids = [p.get_id() for p in podcasts] - num_groups = list(num_groups) + task = MergeTask.objects.get(id=uuid.UUID(task_id)) - res = merge_podcasts.delay(podcast_ids, num_groups, queue_id) + res = merge_podcasts.delay(task.pk) - return HttpResponseRedirect(reverse('admin-merge-status', - args=[res.task_id])) + return HttpResponseRedirect(reverse('admin-merge-status', + args=[res.task_id])) class MergeStatus(AdminView): @@ -221,16 +206,11 @@ def get(self, request, task_id): # TODO: what to do with multiple frontends? cache.clear() - try: - actions, podcast = result.get() - - except IncorrectMergeException as ime: - messages.error(request, str(ime)) - return HttpResponseRedirect(reverse('admin-merge')) + podcast_id = result.get() + podcast = Podcast.objects.get(id=podcast_id) return self.render_to_response({ 'ready': True, - 'actions': actions.items(), 'podcast': podcast, }) diff --git a/mygpo/maintenance/migrations/0006_mergetask_groups.py b/mygpo/maintenance/migrations/0006_mergetask_groups.py new file mode 100644 index 000000000..1dcbbf9f4 --- /dev/null +++ b/mygpo/maintenance/migrations/0006_mergetask_groups.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2017-08-13 09:01 +from __future__ import unicode_literals + +import django.contrib.postgres.fields.jsonb +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('maintenance', '0005_task'), + ] + + operations = [ + migrations.AddField( + model_name='mergetask', + name='groups', + field=django.contrib.postgres.fields.jsonb.JSONField(default=dict), + ), + ] diff --git a/mygpo/maintenance/models.py b/mygpo/maintenance/models.py index ddb70a3f4..bf6d1fdf1 100644 --- a/mygpo/maintenance/models.py +++ b/mygpo/maintenance/models.py @@ -1,12 +1,50 @@ -from django.db import models +import uuid +from datetime import datetime +from collections import defaultdict + +from django.db import models, transaction +from django.contrib.postgres.fields import JSONField from mygpo.core.models import UUIDModel from mygpo.podcasts.models import Podcast +from mygpo.maintenance.merge import merge_model_objects + +import logging +logger = logging.getLogger(__name__) + +DEFAULT_RELEASE = datetime(1970, 1, 1) +_SORT_KEY = lambda ep: ep.released or DEFAULT_RELEASE + + +class MergeTaskManager(models.Manager): + + @transaction.atomic + def create_from_podcasts(self, podcasts): + task = self.create(id=uuid.uuid4()) + + for podcast in podcasts: + mte = MergeTaskEntry.objects.create( + id=uuid.uuid4(), + podcast=podcast, + task=task, + ) + + get_features = lambda episode: (episode.url, episode.title) + + # update groups within MergeTask + task.set_groups(get_features) + task.save() + + return task class MergeTask(UUIDModel): """ A Group of podcasts that could be merged """ + objects = MergeTaskManager() + + groups = JSONField(default=dict) + @property def podcasts(self): """ Returns the podcasts of the task, sorted by subscribers """ @@ -16,6 +54,107 @@ def podcasts(self): return podcasts + def set_groups(self, get_features): + """ Groups the episodes by features extracted using ``get_features`` + + get_features is a callable that expects an episode as parameter, and + returns a value representing the extracted feature(s). + """ + + episodes = self.episodes + + episode_groups = defaultdict(list) + + for episode in episodes.values(): + features = get_features(episode) + episode_groups[features].append(episode.pk.hex) + + groups = sorted(episode_groups.values())#, key=_SORT_KEY) + self.groups = list(groups) + + @property + def episodes(self): + episodes = {} + for podcast in self.podcasts: + episodes.update(dict((e.id.hex, e) for e in podcast.episode_set.all())) + + return episodes + + def episode_groups(self): + """ Return a list of episode lists + + podcasts = [p1, p2, p3] + + Returns + groups = [ + [ep1 of p1, ep1 of p2, None], + [ep2 of p2, None, ep2 of p3], + ] + """ + + episodes = self.episodes + podcasts = self.podcasts + groups = [] + print(episodes) + print(podcasts) + print(self.groups) + + for episode_ids in self.groups: + line = [] + # go through the podcasts in order + for podcast in podcasts: + for episode_id in episode_ids: + episode = episodes.get(episode_id, None) + if episode is None: + continue + + if episode.podcast == podcast: + line.append(episode) + break + else: + # if nothing was found, add None + line.append(None) + + groups.append(line) + print(groups) + return groups + + def merge(self): + """ Carries out the actual merging """ + + logger.info('Start merging of podcasts: %r', self.podcasts) + + podcasts = self.podcasts + podcast1 = podcasts.pop(0) + logger.info('Merge target: %r', podcast1) + + self.merge_episodes() + merge_model_objects(podcast1, podcasts) + + return podcast1 + + def merge_episodes(self): + """ Merges the episodes according to the groups """ + + for episodes in self.episode_groups(): + print('Episodes') + print(episodes) + + if not episodes: + continue + + episode = episodes.pop(0) + + if not episode: + continue + + # the list can contain Nones + episodes = list(filter(None, episodes)) + + logger.info('Merging %d episodes', len(episodes)) + merge_model_objects(episode, episodes) + + class MergeTaskEntry(UUIDModel): """ An entry in a MergeTask """