Skip to content

Commit

Permalink
converted doc upload to celery task
Browse files Browse the repository at this point in the history
  • Loading branch information
shivankacker committed Aug 9, 2023
1 parent c3625be commit 02950da
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 24 deletions.
17 changes: 17 additions & 0 deletions ayushma/migrations/0036_document_uploading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 4.2.1 on 2023-08-09 16:19

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("ayushma", "0035_auto_20230731_1407"),
]

operations = [
migrations.AddField(
model_name="document",
name="uploading",
field=models.BooleanField(default=False),
),
]
1 change: 1 addition & 0 deletions ayushma/models/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Document(BaseModel):
file = models.FileField(null=True, blank=True)
text_content = models.TextField(null=True, blank=True)
project = models.ForeignKey(Project, on_delete=models.PROTECT)
uploading = models.BooleanField(default=False)

def __str__(self) -> str:
return f"{self.title} in {self.project.title}"
3 changes: 2 additions & 1 deletion ayushma/serializers/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ class Meta:
"document_type",
"file",
"text_content",
"uploading",
)
read_only_fields = ("external_id", "created_at", "modified_at")
read_only_fields = ("external_id", "created_at", "modified_at", "uploading")
write_only_fields = ("file",)

def validate(self, data):
Expand Down
50 changes: 50 additions & 0 deletions ayushma/tasks/upsertdoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from time import sleep

from celery import shared_task
from celery.exceptions import SoftTimeLimitExceeded
from django.conf import settings

from ayushma.models.document import Document
from ayushma.models.enums import DocumentType
from ayushma.utils.upsert import upsert


@shared_task(bind=True, soft_time_limit=21600) # 6 hours in seconds
def upsert_doc(self, document_id: str, document_url: str = None):
try:
sleep(5)
if not settings.OPENAI_API_KEY:
print("OpenAI API key not found. Skipping test run.")
return

document: Document = Document.objects.get(external_id=document_id)
if document.document_type == DocumentType.FILE:
upsert(
external_id=document.project.external_id,
s3_url=document_url,
document_id=document.external_id,
)
elif document.document_type == DocumentType.URL:
upsert(
external_id=document.project.external_id,
url=document.text_content,
document_id=document.external_id,
)
elif document.document_type == DocumentType.TEXT:
upsert(
external_id=document.project.external_id,
text=document.text_content,
document_id=document.external_id,
)
else:
raise Exception("Invalid document type.")

document.uploading = True
document.save()

except SoftTimeLimitExceeded:
print("SoftTimeLimitExceeded")
document.uploading = False
document.save()
document.delete()
return
2 changes: 1 addition & 1 deletion ayushma/utils/upsert.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def upsert(
] # prep metadata and upsert batch
to_upsert = zip(ids_batch, embeds, meta) # zip together
pinecone_index.upsert(
vectors=list(to_upsert), namespace=external_id
vectors=list(to_upsert), namespace=str(external_id)
) # upsert to Pinecone

print("Finished upserting to Pinecone index")
36 changes: 14 additions & 22 deletions ayushma/views/document.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
import json
from ast import Delete

from django.conf import settings
from drf_spectacular.utils import extend_schema, extend_schema_view
from rest_framework.exceptions import ValidationError
from rest_framework.mixins import CreateModelMixin, ListModelMixin, RetrieveModelMixin
from rest_framework.mixins import (
CreateModelMixin,
DestroyModelMixin,
ListModelMixin,
RetrieveModelMixin,
)
from rest_framework.parsers import MultiPartParser
from rest_framework.permissions import IsAdminUser
from rest_framework.response import Response

from ayushma.models import Document, DocumentType, Project
from ayushma.serializers.document import DocumentSerializer, DocumentUpdateSerializer
from ayushma.tasks.upsertdoc import upsert_doc
from ayushma.utils.upsert import upsert
from utils.views.base import BaseModelViewSet
from utils.views.mixins import PartialUpdateModelMixin
Expand All @@ -20,6 +27,7 @@ class DocumentViewSet(
PartialUpdateModelMixin,
CreateModelMixin,
RetrieveModelMixin,
DestroyModelMixin,
ListModelMixin,
):
queryset = Document.objects.all()
Expand Down Expand Up @@ -51,29 +59,13 @@ def perform_create(self, serializer):

document = serializer.save(project=project)

doc_url = None
try:
if document.document_type == DocumentType.FILE:
upsert(
external_id=external_id,
s3_url=self.request.build_absolute_uri(document.file.url),
document_id=document.external_id,
)
elif document.document_type == DocumentType.URL:
upsert(
external_id=external_id,
url=document.text_content,
document_id=document.external_id,
)
elif document.document_type == DocumentType.TEXT:
upsert(
external_id=external_id,
text=document.text_content,
document_id=document.external_id,
)
else:
raise Exception("Invalid document type.")
doc_url = self.request.build_absolute_uri(document.file.url)
except Exception as e:
raise ValidationError({"non_field_errors": str(e)})
pass

upsert_doc.delay(document.external_id, doc_url)

def perform_destroy(self, instance):
# delete namespace from vectorDB
Expand Down

0 comments on commit 02950da

Please sign in to comment.