|
9 | 9 | import boto3 |
10 | 10 | import celery |
11 | 11 | from django.conf import settings |
12 | | -from django.db.models import Q |
| 12 | +from django.db.models import Count, Q |
13 | 13 | from django.utils import timezone |
14 | 14 |
|
15 | 15 | from learning_resources.content_summarizer import ContentSummarizer |
|
35 | 35 | load_course_blocklist, |
36 | 36 | resource_unpublished_actions, |
37 | 37 | ) |
| 38 | +from learning_resources_search.constants import COURSE_TYPE |
38 | 39 | from learning_resources_search.exceptions import RetryError |
39 | 40 | from main.celery import app |
40 | 41 | from main.constants import ISOFORMAT |
|
43 | 44 | log = logging.getLogger(__name__) |
44 | 45 |
|
45 | 46 |
|
| 47 | +@app.task(bind=True) |
| 48 | +def remove_duplicate_resources(self): |
| 49 | + """Remove duplicate unpublished resources""" |
| 50 | + from vector_search.tasks import generate_embeddings |
| 51 | + |
| 52 | + duplicates = ( |
| 53 | + LearningResource.objects.values("readable_id") |
| 54 | + .annotate(count_id=Count("id")) |
| 55 | + .filter(count_id__gt=1) |
| 56 | + ) |
| 57 | + embed_tasks = [] |
| 58 | + for duplicate in duplicates: |
| 59 | + unpublished_resources = LearningResource.objects.filter( |
| 60 | + readable_id=duplicate["readable_id"], |
| 61 | + published=False, |
| 62 | + ).values_list("id", flat=True) |
| 63 | + published_resources = LearningResource.objects.filter( |
| 64 | + readable_id=duplicate["readable_id"], |
| 65 | + published=False, |
| 66 | + ).values_list("id", flat=True) |
| 67 | + # keep the most recently created resource, delete the rest |
| 68 | + LearningResource.objects.filter(id__in=unpublished_resources).delete() |
| 69 | + embed_tasks.append( |
| 70 | + generate_embeddings.si(published_resources, COURSE_TYPE, overwrite=True) |
| 71 | + ) |
| 72 | + self.replace(celery.chain(*embed_tasks)) |
| 73 | + |
| 74 | + |
46 | 75 | @app.task |
47 | 76 | def update_next_start_date_and_prices(): |
48 | 77 | """Update expired next start dates and prices""" |
|
0 commit comments