change generating dummy data to use new models

i-dot-ai · Jan 2, 2025 · 8d05f63 · 8d05f63
1 parent 2235b79
commit 8d05f63
Show file tree

Hide file tree

Showing 8 changed files with 156 additions and 236 deletions.
diff --git a/consultation_analyser/consultations/dummy_data.py b/consultation_analyser/consultations/dummy_data.py
@@ -1,59 +1,123 @@
-import datetime
 import random
+from typing import Optional
 
-from consultation_analyser.factories import (
-    ConsultationBuilder,
-    FakeConsultationData,
+import yaml
+
+from consultation_analyser.consultations import models
+from consultation_analyser.factories2 import (
+    Answer2Factory,
+    Consultation2Factory,
+    ExecutionRunFactory,
+    FrameworkFactory,
+    Question2Factory,
+    QuestionPartFactory,
+    RespondentFactory,
+    SentimentMappingFactory,
+    Theme2Factory,
+    ThemeMappingFactory,
 )
 from consultation_analyser.hosting_environment import HostingEnvironment
 
 
-def create_dummy_data(responses=20, number_questions=10, **options):
-    if number_questions > 10:
-        raise RuntimeError("You can't have more than 10 questions")
+def create_dummy_consultation_from_yaml(
+    file_path: str = "./tests/examples/sample_questions.yml",
+    number_respondents: int = 10,
+    consultation: Optional[models.Consultation2] = None,
+) -> Consultation2Factory:
+    """
+    Create consultation with question, question parts, answers and themes from yaml file.
+    Creates relevant objects: Consultation, Question, QuestionPart, Answer, Theme, ThemeMapping,
+    SentimentMapping, Framework, ExecutionRun.
+    """
     if HostingEnvironment.is_production():
         raise RuntimeError("Dummy data generation should not be run in production")
 
-    # Timestamp to avoid duplicates - set these as default options
-    timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
-    if "name" not in options:
-        options["name"] = f"Dummy consultation generated at {timestamp}"
-    if "slug" not in options:
-        options["slug"] = f"consultation-slug-{timestamp}"
-
-    consultation_builder = ConsultationBuilder(**options)
-    fake_consultation_data = FakeConsultationData()
-    all_questions = fake_consultation_data.all_questions()
-    questions_to_include = all_questions[:number_questions]
-
-    questions = [
-        consultation_builder.add_question(
-            text=q["text"],
-            slug=q["slug"],
-            multiple_choice_questions=[
-                (x["question_text"], x["options"]) for x in (q.get("multiple_choice") or [])
-            ],
-            has_free_text=q["has_free_text"],
+    if not consultation:
+        consultation = Consultation2Factory()
+    respondents = [RespondentFactory(consultation=consultation) for _ in range(number_respondents)]
+
+    with open(file_path, "r") as file:
+        questions_data = yaml.safe_load(file)
+
+    # Save all questions, and corresponding question parts and answers
+    for question_data in questions_data:
+        question = Question2Factory(
+            text=question_data["question_text"],
+            order=question_data["order"],
+            consultation=consultation,
         )
-        for q in questions_to_include
-    ]
-
-    for i, r in enumerate(range(responses)):
-        for q in questions:
-            if q.has_free_text:
-                if random.randrange(1, 4) == 1:
-                    free_text_answer = ""
-                else:
-                    free_text_answer = fake_consultation_data.get_free_text_answer(q.slug)
-            else:
-                free_text_answer = None
-
-            consultation_builder.add_answer(q, free_text=free_text_answer)
-            consultation_builder.next_response()
-
-    # always assign a double multichoice selection to the last question
-    question_options = q.multiple_choice_options[0]
-    answers = (question_options["question_text"], question_options["options"][:2])
-    consultation_builder.add_answer(q, multiple_choice_answers=[answers])
-
-    return consultation_builder.consultation
+        parts = question_data["parts"]
+
+        # Each question part is considered separately
+        for part in parts:
+            question_part_type = part["type"]
+            question_part = QuestionPartFactory(
+                question=question,
+                text=part["text"],
+                type=question_part_type,
+                options=part.get("options", []),
+                order=part["order"],
+            )
+
+            # Get themes if free_text
+            if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
+                # Simulate execution runs for each question to generate sentiment, themes, theme mapping
+                sentiment_run = ExecutionRunFactory(
+                    type=models.ExecutionRun.TaskType.SENTIMENT_ANALYSIS
+                )
+                theme_generation_run = ExecutionRunFactory(
+                    type=models.ExecutionRun.TaskType.THEME_GENERATION
+                )
+                framework = FrameworkFactory(
+                    execution_run=theme_generation_run, question_part=question_part
+                )
+                theme_mapping_run = ExecutionRunFactory(
+                    type=models.ExecutionRun.TaskType.THEME_MAPPING
+                )
+                themes = part.get("themes", [])
+                for theme in themes:
+                    theme_objects = [
+                        Theme2Factory(
+                            framework=framework,
+                            theme_name=theme["name"],
+                            theme_description=theme["description"],
+                        )
+                    ]
+
+            # Now populate the answers and corresponding themes etc. for these question parts
+            for respondent in respondents:
+                if question_part_type == models.QuestionPart.QuestionType.SINGLE_OPTION:
+                    chosen_options = random.choice(part["options"])
+                    text = ""
+                elif question_part_type == models.QuestionPart.QuestionType.MULTIPLE_OPTIONS:
+                    chosen_options = random.sample(
+                        part["options"], k=random.randint(1, len(part["options"]))
+                    )
+                    text = ""
+                elif question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
+                    text = random.choice(part.get("free_text_answers", [""]))
+                    chosen_options = []
+
+                answer = Answer2Factory(
+                    question_part=question_part,
+                    text=text,
+                    chosen_options=chosen_options,
+                    respondent=respondent,
+                )
+                # Now map (multiple) themes and sentiment to each answer for free-text questions.
+                # This is in a different order to how it would work in pipeline - but this is as we
+                # are reading from file.
+                if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
+                    themes_for_answer = random.sample(
+                        theme_objects, k=random.randint(1, len(theme_objects))
+                    )
+                    for theme in themes_for_answer:
+                        ThemeMappingFactory(
+                            answer=answer, theme=theme, execution_run=theme_mapping_run
+                        )
+                    sentiment = random.choice(models.SentimentMapping.PositionType.values)
+                    SentimentMappingFactory(
+                        answer=answer, execution_run=sentiment_run, position=sentiment
+                    )
+
+    return consultation
diff --git a/consultation_analyser/consultations/management/commands/generate_dummy_data.py b/consultation_analyser/consultations/management/commands/generate_dummy_data.py
@@ -1,6 +1,6 @@
 from django.core.management.base import BaseCommand
 
-from consultation_analyser.factories2 import create_dummy_consultation_from_yaml
+from consultation_analyser.consultations.dummy_data import create_dummy_consultation_from_yaml
 
 
 class Command(BaseCommand):

diff --git a/consultation_analyser/factories.py b/consultation_analyser/factories.py
@@ -2,7 +2,6 @@
 
 import factory
 import faker as _faker
-import yaml
 from django.utils import timezone
 
 from consultation_analyser.authentication import models as authentication_models
@@ -17,24 +16,6 @@ def generate_dummy_topic_keywords():
     return words.split(" ")
 
 
-class FakeConsultationData:
-    def __init__(self):
-        with open("./tests/examples/questions.yml", "r") as f:
-            questions = yaml.safe_load(f)
-            slugs = [q["slug"] for q in questions]
-            self.questions = dict(zip(slugs, questions))
-
-    def question(self):
-        return random.choice(list(self.questions.values()))
-
-    def get_free_text_answer(self, slug):
-        q = self.questions[slug]
-        return random.choice(q["answers"])
-
-    def all_questions(self):
-        return list(self.questions.values())
-
-
 class ConsultationBuilder:
     def __init__(self, **kwargs):
         consultation = ConsultationFactory(**kwargs)

diff --git a/consultation_analyser/factories2.py b/consultation_analyser/factories2.py
@@ -1,8 +1,6 @@
 import random
-from typing import Optional
 
 import factory
-import yaml
 from factory.django import DjangoModelFactory
 from faker import Faker
 
@@ -109,104 +107,3 @@ class Meta:
     answer = factory.SubFactory(Answer2Factory)
     execution_run = factory.SubFactory(ExecutionRunFactory)
     position = factory.Iterator(models.SentimentMapping.PositionType.values)
-
-
-def create_dummy_consultation_from_yaml(
-    file_path: str = "./tests/examples/sample_questions.yml",
-    number_respondents: int = 10,
-    consultation: Optional[models.Consultation2] = None,
-) -> Consultation2Factory:
-    """
-    Create consultation with question, question parts, answers and themes from yaml file.
-    Creates relevant objects: Consultation, Question, QuestionPart, Answer, Theme, ThemeMapping,
-    SentimentMapping, Framework, ExecutionRun.
-    """
-    if not consultation:
-        consultation = Consultation2Factory()
-    respondents = [RespondentFactory(consultation=consultation) for _ in range(number_respondents)]
-
-    with open(file_path, "r") as file:
-        questions_data = yaml.safe_load(file)
-
-    # Save all questions, and corresponding question parts and answers
-    for question_data in questions_data:
-        question = Question2Factory(
-            text=question_data["question_text"],
-            order=question_data["order"],
-            consultation=consultation,
-        )
-        parts = question_data["parts"]
-
-        # Each question part is considered separately
-        for part in parts:
-            question_part_type = part["type"]
-            question_part = QuestionPartFactory(
-                question=question,
-                text=part["text"],
-                type=question_part_type,
-                options=part.get("options", []),
-                order=part["order"],
-            )
-
-            # Get themes if free_text
-            if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
-                # Simulate execution runs for each question to generate sentiment, themes, theme mapping
-                sentiment_run = ExecutionRunFactory(
-                    type=models.ExecutionRun.TaskType.SENTIMENT_ANALYSIS
-                )
-                theme_generation_run = ExecutionRunFactory(
-                    type=models.ExecutionRun.TaskType.THEME_GENERATION
-                )
-                framework = FrameworkFactory(
-                    execution_run=theme_generation_run, question_part=question_part
-                )
-                theme_mapping_run = ExecutionRunFactory(
-                    type=models.ExecutionRun.TaskType.THEME_MAPPING
-                )
-                themes = part.get("themes", [])
-                for theme in themes:
-                    theme_objects = [
-                        Theme2Factory(
-                            framework=framework,
-                            theme_name=theme["name"],
-                            theme_description=theme["description"],
-                        )
-                    ]
-
-            # Now populate the answers and corresponding themes etc. for these question parts
-            for respondent in respondents:
-                if question_part_type == models.QuestionPart.QuestionType.SINGLE_OPTION:
-                    chosen_options = random.choice(part["options"])
-                    text = ""
-                elif question_part_type == models.QuestionPart.QuestionType.MULTIPLE_OPTIONS:
-                    chosen_options = random.sample(
-                        part["options"], k=random.randint(1, len(part["options"]))
-                    )
-                    text = ""
-                elif question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
-                    text = random.choice(part.get("free_text_answers", [""]))
-                    chosen_options = []
-
-                answer = Answer2Factory(
-                    question_part=question_part,
-                    text=text,
-                    chosen_options=chosen_options,
-                    respondent=respondent,
-                )
-                # Now map (multiple) themes and sentiment to each answer for free-text questions.
-                # This is in a different order to how it would work in pipeline - but this is as we
-                # are reading from file.
-                if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
-                    themes_for_answer = random.sample(
-                        theme_objects, k=random.randint(1, len(theme_objects))
-                    )
-                    for theme in themes_for_answer:
-                        ThemeMappingFactory(
-                            answer=answer, theme=theme, execution_run=theme_mapping_run
-                        )
-                    sentiment = random.choice(models.SentimentMapping.PositionType.values)
-                    SentimentMappingFactory(
-                        answer=answer, execution_run=sentiment_run, position=sentiment
-                    )
-
-    return consultation
diff --git a/consultation_analyser/support_console/views/consultations.py b/consultation_analyser/support_console/views/consultations.py
@@ -6,7 +6,7 @@
 
 from consultation_analyser.consultations import models
 from consultation_analyser.consultations.download_consultation import consultation_to_json
-from consultation_analyser.factories2 import create_dummy_consultation_from_yaml
+from consultation_analyser.consultations.dummy_data import create_dummy_consultation_from_yaml
 from consultation_analyser.hosting_environment import HostingEnvironment
 
 NO_SUMMARY_STR = "Unable to generate summary for this theme"

diff --git a/tests/commands/test_dummy_data.py b/tests/commands/test_dummy_data.py
@@ -13,16 +13,11 @@
 def test_name_parameter_sets_consultation_name(mock_is_local):
     call_command(
         "generate_dummy_data",
-        name="My special consultation",
         stdout=StringIO(),  # we'll ignore this
     )
 
-    assert models.Consultation.objects.count() == 1
-    assert models.Question.objects.count() == 10
-    assert models.Answer.objects.count() >= 100
-
-    assert models.Consultation.objects.first().name == "My special consultation"
-    assert models.Consultation.objects.first().slug == "my-special-consultation"
+    assert models.Consultation2.objects.count() == 1
+    assert models.Question2.objects.count() == 5
 
 
 @pytest.mark.django_db
@@ -34,6 +29,5 @@ def test_the_tool_will_only_run_in_dev(environment):
         ):
             call_command(
                 "generate_dummy_data",
-                name="My special consultation",
                 stdout=StringIO(),  # we'll ignore this
             )