Skip to content

Commit

Permalink
Add status for TestRuns (#224)
Browse files Browse the repository at this point in the history
* Add status for TestRuns

* switch to Integer choices
  • Loading branch information
Ashesh3 authored Jul 31, 2023
1 parent 442eb8f commit b38d986
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 87 deletions.
51 changes: 51 additions & 0 deletions ayushma/migrations/0034_remove_testrun_complete_testrun_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Generated by Django 4.2.1 on 2023-07-31 12:00

from django.db import migrations, models

from ayushma.models.enums import StatusChoices


def migrate_testrun_status(apps, schema_editor):
TestRun = apps.get_model("ayushma", "TestRun")
for testrun in TestRun.objects.all():
if testrun.complete:
testrun.status = StatusChoices.COMPLETED
else:
testrun.status = StatusChoices.CANCELED
testrun.save()


def reverse_testrun_status(apps, schema_editor):
TestRun = apps.get_model("ayushma", "TestRun")
for testrun in TestRun.objects.all():
if testrun.status == StatusChoices.COMPLETED:
testrun.complete = True
else:
testrun.complete = False
testrun.save()


class Migration(migrations.Migration):
dependencies = [
("ayushma", "0033_project_archived"),
]

operations = [
migrations.RemoveField(
model_name="testrun",
name="complete",
),
migrations.AddField(
model_name="testrun",
name="status",
field=models.IntegerField(
choices=[
(1, "Running"),
(2, "Completed"),
(3, "Canceled"),
(4, "Failed"),
],
default=1,
),
),
]
7 changes: 7 additions & 0 deletions ayushma/models/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,10 @@ class ModelType(IntegerChoices):
GPT_3_5_16K = 2
GPT_4 = 3
GPT_4_32K = 4


class StatusChoices(IntegerChoices):
RUNNING = 1
COMPLETED = 2
CANCELED = 3
FAILED = 4
5 changes: 2 additions & 3 deletions ayushma/models/testsuite.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from django.db import models
from django.db.models import (
CASCADE,
BooleanField,
CharField,
FloatField,
ForeignKey,
Expand All @@ -11,7 +10,7 @@

from ayushma.models import Project
from ayushma.models.document import Document
from ayushma.models.enums import FeedBackRating
from ayushma.models.enums import FeedBackRating, StatusChoices
from ayushma.models.users import User
from utils.models.base import BaseModel

Expand All @@ -32,7 +31,7 @@ class TestQuestion(BaseModel):
class TestRun(BaseModel):
test_suite = ForeignKey(TestSuite, on_delete=CASCADE)
project = ForeignKey(Project, on_delete=CASCADE)
complete = BooleanField(default=False)
status = IntegerField(choices=StatusChoices.choices, default=StatusChoices.RUNNING)


class TestResult(BaseModel):
Expand Down
4 changes: 2 additions & 2 deletions ayushma/serializers/testsuite.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class Meta:
class TestResultSerializer(serializers.ModelSerializer):
feedback = FeedbackSerializer(source="feedback_set", many=True, read_only=True)
references = DocumentSerializer(many=True, read_only=True)

class Meta:
model = TestResult
fields = "__all__"
Expand Down Expand Up @@ -87,14 +88,13 @@ class Meta:
"project_object",
"created_at",
"modified_at",
"complete",
"status",
"test_results",
)
read_only_fields = (
"external_id",
"created_at",
"modified_at",
"project",
"complete",
"test_results",
)
178 changes: 97 additions & 81 deletions ayushma/tasks/testrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import openai
from celery import shared_task
from celery.exceptions import SoftTimeLimitExceeded
from django.conf import settings
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu

Expand All @@ -11,89 +12,104 @@
from ayushma.utils.openaiapi import converse, cosine_similarity, get_embedding


@shared_task
@shared_task(bind=True, soft_time_limit=21600) # 6 hours in seconds
def mark_test_run_as_completed(test_run_id):
sleep(5)

if not settings.OPENAI_API_KEY:
print("OpenAI API key not found. Skipping test run.")
return
try:
sleep(5)

if not settings.OPENAI_API_KEY:
print("OpenAI API key not found. Skipping test run.")
return

test_run = TestRun.objects.get(id=test_run_id)
test_suite = test_run.test_suite
test_questions = test_suite.testquestion_set.all()

temperature = test_suite.temperature
topk = test_suite.topk

chat = Chat()
chat.title = "Test Run: " + test_run.created_at.strftime("%Y-%m-%d %H:%M:%S")
chat.project = test_run.project
chat.save()

for test_question in test_questions:
sleep(
30
) # Wait for 30 seconds to give previous test question time to complete

if TestRun.objects.get(id=test_run_id).status == "canceled":
print("Test run canceled")
return

test_result = TestResult()
test_result.test_run = test_run
test_result.test_question = test_question
test_result.question = test_question.question
test_result.human_answer = test_question.human_answer

try:
english_text = test_question.question
translated_text = test_question.question

if test_question.language != "en":
english_text = translate_text("en-IN", english_text)
translated_text = translate_text(
test_question.language + "-IN", english_text
)

ai_response = next(
converse(
english_text=english_text,
local_translated_text=translated_text,
openai_key=settings.OPENAI_API_KEY,
chat=chat,
match_number=topk,
stats=dict(),
temperature=temperature,
user_language=test_question.language + "-IN",
stream=False,
generate_audio=False,
)
)

test_run = TestRun.objects.get(id=test_run_id)
test_suite = test_run.test_suite
test_questions = test_suite.testquestion_set.all()

temperature = test_suite.temperature
topk = test_suite.topk

chat = Chat()
chat.title = "Test Run: " + test_run.created_at.strftime("%Y-%m-%d %H:%M:%S")
chat.project = test_run.project
chat.save()

for test_question in test_questions:
sleep(15) # Wait for 15 seconds to give previous test question time to complete
test_result = TestResult()
test_result.test_run = test_run
test_result.test_question = test_question
test_result.question = test_question.question
test_result.human_answer = test_question.human_answer

try:
english_text = test_question.question
translated_text = test_question.question

if test_question.language != "en":
english_text = translate_text("en-IN", english_text)
translated_text = translate_text(
test_question.language + "-IN", english_text
# Calculate cosine similarity
openai.api_key = settings.OPENAI_API_KEY
ai_response_embedding = get_embedding(ai_response.message)
human_answer_embedding = get_embedding(test_question.human_answer)
cosine_sim = cosine_similarity(
ai_response_embedding, human_answer_embedding
)

ai_response = next(
converse(
english_text=english_text,
local_translated_text=translated_text,
openai_key=settings.OPENAI_API_KEY,
chat=chat,
match_number=topk,
stats=dict(),
temperature=temperature,
user_language=test_question.language + "-IN",
stream=False,
generate_audio=False,
# Calculate BLEU score ( https://www.nltk.org/api/nltk.translate.bleu_score.html#nltk.translate.bleu_score.SmoothingFunction.__init__ )
reference_tokens = test_question.human_answer.split()
candidate_tokens = ai_response.message.split()

smoothie = SmoothingFunction().method4
bleu_score = sentence_bleu(
[reference_tokens], candidate_tokens, smoothing_function=smoothie
)
)

# Calculate cosine similarity
openai.api_key = settings.OPENAI_API_KEY
ai_response_embedding = get_embedding(ai_response.message)
human_answer_embedding = get_embedding(test_question.human_answer)
cosine_sim = cosine_similarity(
ai_response_embedding, human_answer_embedding
)

# Calculate BLEU score ( https://www.nltk.org/api/nltk.translate.bleu_score.html#nltk.translate.bleu_score.SmoothingFunction.__init__ )
reference_tokens = test_question.human_answer.split()
candidate_tokens = ai_response.message.split()

smoothie = SmoothingFunction().method4
bleu_score = sentence_bleu(
[reference_tokens], candidate_tokens, smoothing_function=smoothie
)

test_result.answer = ai_response.message
test_result.cosine_sim = cosine_sim
test_result.bleu_score = round(bleu_score, 4)

except Exception as e:
print("Error while running test question: ", e)
test_result.answer = ""
test_result.cosine_sim = 0
test_result.bleu_score = 0

finally:
test_result.save()
test_result.references.set(ai_response.reference_documents.all())
test_result.save()
test_run.complete = True
test_run.save()

test_result.answer = ai_response.message
test_result.cosine_sim = cosine_sim
test_result.bleu_score = round(bleu_score, 4)

except Exception as e:
print("Error while running test question: ", e)
test_result.answer = ""
test_result.cosine_sim = 0
test_result.bleu_score = 0

finally:
test_result.save()
test_result.references.set(ai_response.reference_documents.all())
test_result.save()

test_run.status = "completed"
test_run.save()

except SoftTimeLimitExceeded:
print("SoftTimeLimitExceeded")
test_run.status = "failed"
test_run.save()
return
1 change: 0 additions & 1 deletion ayushma/utils/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def __init__(
"temperature": temperature, # 0 means more deterministic output, 1 means more random output
"openai_api_key": openai_api_key,
"model_name": get_model_name(model),
"max_retries": 30,
}
if stream:
llm_args["streaming"] = True
Expand Down

0 comments on commit b38d986

Please sign in to comment.