Add status for TestRuns (#224)

* Add status for TestRuns * switch to Integer choices
ohcnetwork · Jul 31, 2023 · b38d986 · b38d986
1 parent 442eb8f
commit b38d986
Show file tree

Hide file tree

Showing 6 changed files with 159 additions and 87 deletions.
diff --git a/ayushma/migrations/0034_remove_testrun_complete_testrun_status.py b/ayushma/migrations/0034_remove_testrun_complete_testrun_status.py
@@ -0,0 +1,51 @@
+# Generated by Django 4.2.1 on 2023-07-31 12:00
+
+from django.db import migrations, models
+
+from ayushma.models.enums import StatusChoices
+
+
+def migrate_testrun_status(apps, schema_editor):
+    TestRun = apps.get_model("ayushma", "TestRun")
+    for testrun in TestRun.objects.all():
+        if testrun.complete:
+            testrun.status = StatusChoices.COMPLETED
+        else:
+            testrun.status = StatusChoices.CANCELED
+        testrun.save()
+
+
+def reverse_testrun_status(apps, schema_editor):
+    TestRun = apps.get_model("ayushma", "TestRun")
+    for testrun in TestRun.objects.all():
+        if testrun.status == StatusChoices.COMPLETED:
+            testrun.complete = True
+        else:
+            testrun.complete = False
+        testrun.save()
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("ayushma", "0033_project_archived"),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="testrun",
+            name="complete",
+        ),
+        migrations.AddField(
+            model_name="testrun",
+            name="status",
+            field=models.IntegerField(
+                choices=[
+                    (1, "Running"),
+                    (2, "Completed"),
+                    (3, "Canceled"),
+                    (4, "Failed"),
+                ],
+                default=1,
+            ),
+        ),
+    ]
diff --git a/ayushma/models/enums.py b/ayushma/models/enums.py
@@ -32,3 +32,10 @@ class ModelType(IntegerChoices):
     GPT_3_5_16K = 2
     GPT_4 = 3
     GPT_4_32K = 4
+
+
+class StatusChoices(IntegerChoices):
+    RUNNING = 1
+    COMPLETED = 2
+    CANCELED = 3
+    FAILED = 4
diff --git a/ayushma/models/testsuite.py b/ayushma/models/testsuite.py
@@ -1,7 +1,6 @@
 from django.db import models
 from django.db.models import (
     CASCADE,
-    BooleanField,
     CharField,
     FloatField,
     ForeignKey,
@@ -11,7 +10,7 @@
 
 from ayushma.models import Project
 from ayushma.models.document import Document
-from ayushma.models.enums import FeedBackRating
+from ayushma.models.enums import FeedBackRating, StatusChoices
 from ayushma.models.users import User
 from utils.models.base import BaseModel
 
@@ -32,7 +31,7 @@ class TestQuestion(BaseModel):
 class TestRun(BaseModel):
     test_suite = ForeignKey(TestSuite, on_delete=CASCADE)
     project = ForeignKey(Project, on_delete=CASCADE)
-    complete = BooleanField(default=False)
+    status = IntegerField(choices=StatusChoices.choices, default=StatusChoices.RUNNING)
 
 
 class TestResult(BaseModel):

diff --git a/ayushma/serializers/testsuite.py b/ayushma/serializers/testsuite.py
@@ -59,6 +59,7 @@ class Meta:
 class TestResultSerializer(serializers.ModelSerializer):
     feedback = FeedbackSerializer(source="feedback_set", many=True, read_only=True)
     references = DocumentSerializer(many=True, read_only=True)
+
     class Meta:
         model = TestResult
         fields = "__all__"
@@ -87,14 +88,13 @@ class Meta:
             "project_object",
             "created_at",
             "modified_at",
-            "complete",
+            "status",
             "test_results",
         )
         read_only_fields = (
             "external_id",
             "created_at",
             "modified_at",
             "project",
-            "complete",
             "test_results",
         )
diff --git a/ayushma/tasks/testrun.py b/ayushma/tasks/testrun.py
@@ -2,6 +2,7 @@
 
 import openai
 from celery import shared_task
+from celery.exceptions import SoftTimeLimitExceeded
 from django.conf import settings
 from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu
 
@@ -11,89 +12,104 @@
 from ayushma.utils.openaiapi import converse, cosine_similarity, get_embedding
 
 
-@shared_task
+@shared_task(bind=True, soft_time_limit=21600)  # 6 hours in seconds
 def mark_test_run_as_completed(test_run_id):
-    sleep(5)
-
-    if not settings.OPENAI_API_KEY:
-        print("OpenAI API key not found. Skipping test run.")
-        return
+    try:
+        sleep(5)
+
+        if not settings.OPENAI_API_KEY:
+            print("OpenAI API key not found. Skipping test run.")
+            return
+
+        test_run = TestRun.objects.get(id=test_run_id)
+        test_suite = test_run.test_suite
+        test_questions = test_suite.testquestion_set.all()
+
+        temperature = test_suite.temperature
+        topk = test_suite.topk
+
+        chat = Chat()
+        chat.title = "Test Run: " + test_run.created_at.strftime("%Y-%m-%d %H:%M:%S")
+        chat.project = test_run.project
+        chat.save()
+
+        for test_question in test_questions:
+            sleep(
+                30
+            )  # Wait for 30 seconds to give previous test question time to complete
+
+            if TestRun.objects.get(id=test_run_id).status == "canceled":
+                print("Test run canceled")
+                return
+
+            test_result = TestResult()
+            test_result.test_run = test_run
+            test_result.test_question = test_question
+            test_result.question = test_question.question
+            test_result.human_answer = test_question.human_answer
+
+            try:
+                english_text = test_question.question
+                translated_text = test_question.question
+
+                if test_question.language != "en":
+                    english_text = translate_text("en-IN", english_text)
+                    translated_text = translate_text(
+                        test_question.language + "-IN", english_text
+                    )
+
+                ai_response = next(
+                    converse(
+                        english_text=english_text,
+                        local_translated_text=translated_text,
+                        openai_key=settings.OPENAI_API_KEY,
+                        chat=chat,
+                        match_number=topk,
+                        stats=dict(),
+                        temperature=temperature,
+                        user_language=test_question.language + "-IN",
+                        stream=False,
+                        generate_audio=False,
+                    )
+                )
 
-    test_run = TestRun.objects.get(id=test_run_id)
-    test_suite = test_run.test_suite
-    test_questions = test_suite.testquestion_set.all()
-
-    temperature = test_suite.temperature
-    topk = test_suite.topk
-
-    chat = Chat()
-    chat.title = "Test Run: " + test_run.created_at.strftime("%Y-%m-%d %H:%M:%S")
-    chat.project = test_run.project
-    chat.save()
-
-    for test_question in test_questions:
-        sleep(15)  # Wait for 15 seconds to give previous test question time to complete
-        test_result = TestResult()
-        test_result.test_run = test_run
-        test_result.test_question = test_question
-        test_result.question = test_question.question
-        test_result.human_answer = test_question.human_answer
-
-        try:
-            english_text = test_question.question
-            translated_text = test_question.question
-
-            if test_question.language != "en":
-                english_text = translate_text("en-IN", english_text)
-                translated_text = translate_text(
-                    test_question.language + "-IN", english_text
+                # Calculate cosine similarity
+                openai.api_key = settings.OPENAI_API_KEY
+                ai_response_embedding = get_embedding(ai_response.message)
+                human_answer_embedding = get_embedding(test_question.human_answer)
+                cosine_sim = cosine_similarity(
+                    ai_response_embedding, human_answer_embedding
                 )
 
-            ai_response = next(
-                converse(
-                    english_text=english_text,
-                    local_translated_text=translated_text,
-                    openai_key=settings.OPENAI_API_KEY,
-                    chat=chat,
-                    match_number=topk,
-                    stats=dict(),
-                    temperature=temperature,
-                    user_language=test_question.language + "-IN",
-                    stream=False,
-                    generate_audio=False,
+                # Calculate BLEU score ( https://www.nltk.org/api/nltk.translate.bleu_score.html#nltk.translate.bleu_score.SmoothingFunction.__init__ )
+                reference_tokens = test_question.human_answer.split()
+                candidate_tokens = ai_response.message.split()
+
+                smoothie = SmoothingFunction().method4
+                bleu_score = sentence_bleu(
+                    [reference_tokens], candidate_tokens, smoothing_function=smoothie
                 )
-            )
-
-            # Calculate cosine similarity
-            openai.api_key = settings.OPENAI_API_KEY
-            ai_response_embedding = get_embedding(ai_response.message)
-            human_answer_embedding = get_embedding(test_question.human_answer)
-            cosine_sim = cosine_similarity(
-                ai_response_embedding, human_answer_embedding
-            )
-
-            # Calculate BLEU score ( https://www.nltk.org/api/nltk.translate.bleu_score.html#nltk.translate.bleu_score.SmoothingFunction.__init__ )
-            reference_tokens = test_question.human_answer.split()
-            candidate_tokens = ai_response.message.split()
-
-            smoothie = SmoothingFunction().method4
-            bleu_score = sentence_bleu(
-                [reference_tokens], candidate_tokens, smoothing_function=smoothie
-            )
-
-            test_result.answer = ai_response.message
-            test_result.cosine_sim = cosine_sim
-            test_result.bleu_score = round(bleu_score, 4)
-
-        except Exception as e:
-            print("Error while running test question: ", e)
-            test_result.answer = ""
-            test_result.cosine_sim = 0
-            test_result.bleu_score = 0
-
-        finally:
-            test_result.save()
-            test_result.references.set(ai_response.reference_documents.all())
-            test_result.save()
-    test_run.complete = True
-    test_run.save()
+
+                test_result.answer = ai_response.message
+                test_result.cosine_sim = cosine_sim
+                test_result.bleu_score = round(bleu_score, 4)
+
+            except Exception as e:
+                print("Error while running test question: ", e)
+                test_result.answer = ""
+                test_result.cosine_sim = 0
+                test_result.bleu_score = 0
+
+            finally:
+                test_result.save()
+                test_result.references.set(ai_response.reference_documents.all())
+                test_result.save()
+
+            test_run.status = "completed"
+            test_run.save()
+
+    except SoftTimeLimitExceeded:
+        print("SoftTimeLimitExceeded")
+        test_run.status = "failed"
+        test_run.save()
+        return
diff --git a/ayushma/utils/langchain.py b/ayushma/utils/langchain.py
@@ -50,7 +50,6 @@ def __init__(
             "temperature": temperature,  # 0 means more deterministic output, 1 means more random output
             "openai_api_key": openai_api_key,
             "model_name": get_model_name(model),
-            "max_retries": 30,
         }
         if stream:
             llm_args["streaming"] = True