Skip to content

Commit

Permalink
change generating dummy data to use new models
Browse files Browse the repository at this point in the history
  • Loading branch information
nmenezes0 committed Jan 2, 2025
1 parent 2235b79 commit 8d05f63
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 236 deletions.
162 changes: 113 additions & 49 deletions consultation_analyser/consultations/dummy_data.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,123 @@
import datetime
import random
from typing import Optional

from consultation_analyser.factories import (
ConsultationBuilder,
FakeConsultationData,
import yaml

from consultation_analyser.consultations import models
from consultation_analyser.factories2 import (
Answer2Factory,
Consultation2Factory,
ExecutionRunFactory,
FrameworkFactory,
Question2Factory,
QuestionPartFactory,
RespondentFactory,
SentimentMappingFactory,
Theme2Factory,
ThemeMappingFactory,
)
from consultation_analyser.hosting_environment import HostingEnvironment


def create_dummy_data(responses=20, number_questions=10, **options):
if number_questions > 10:
raise RuntimeError("You can't have more than 10 questions")
def create_dummy_consultation_from_yaml(
file_path: str = "./tests/examples/sample_questions.yml",
number_respondents: int = 10,
consultation: Optional[models.Consultation2] = None,
) -> Consultation2Factory:
"""
Create consultation with question, question parts, answers and themes from yaml file.
Creates relevant objects: Consultation, Question, QuestionPart, Answer, Theme, ThemeMapping,
SentimentMapping, Framework, ExecutionRun.
"""
if HostingEnvironment.is_production():
raise RuntimeError("Dummy data generation should not be run in production")

# Timestamp to avoid duplicates - set these as default options
timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
if "name" not in options:
options["name"] = f"Dummy consultation generated at {timestamp}"
if "slug" not in options:
options["slug"] = f"consultation-slug-{timestamp}"

consultation_builder = ConsultationBuilder(**options)
fake_consultation_data = FakeConsultationData()
all_questions = fake_consultation_data.all_questions()
questions_to_include = all_questions[:number_questions]

questions = [
consultation_builder.add_question(
text=q["text"],
slug=q["slug"],
multiple_choice_questions=[
(x["question_text"], x["options"]) for x in (q.get("multiple_choice") or [])
],
has_free_text=q["has_free_text"],
if not consultation:
consultation = Consultation2Factory()
respondents = [RespondentFactory(consultation=consultation) for _ in range(number_respondents)]

with open(file_path, "r") as file:
questions_data = yaml.safe_load(file)

# Save all questions, and corresponding question parts and answers
for question_data in questions_data:
question = Question2Factory(
text=question_data["question_text"],
order=question_data["order"],
consultation=consultation,
)
for q in questions_to_include
]

for i, r in enumerate(range(responses)):
for q in questions:
if q.has_free_text:
if random.randrange(1, 4) == 1:
free_text_answer = ""
else:
free_text_answer = fake_consultation_data.get_free_text_answer(q.slug)
else:
free_text_answer = None

consultation_builder.add_answer(q, free_text=free_text_answer)
consultation_builder.next_response()

# always assign a double multichoice selection to the last question
question_options = q.multiple_choice_options[0]
answers = (question_options["question_text"], question_options["options"][:2])
consultation_builder.add_answer(q, multiple_choice_answers=[answers])

return consultation_builder.consultation
parts = question_data["parts"]

# Each question part is considered separately
for part in parts:
question_part_type = part["type"]
question_part = QuestionPartFactory(
question=question,
text=part["text"],
type=question_part_type,
options=part.get("options", []),
order=part["order"],
)

# Get themes if free_text
if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
# Simulate execution runs for each question to generate sentiment, themes, theme mapping
sentiment_run = ExecutionRunFactory(
type=models.ExecutionRun.TaskType.SENTIMENT_ANALYSIS
)
theme_generation_run = ExecutionRunFactory(
type=models.ExecutionRun.TaskType.THEME_GENERATION
)
framework = FrameworkFactory(
execution_run=theme_generation_run, question_part=question_part
)
theme_mapping_run = ExecutionRunFactory(
type=models.ExecutionRun.TaskType.THEME_MAPPING
)
themes = part.get("themes", [])
for theme in themes:
theme_objects = [
Theme2Factory(
framework=framework,
theme_name=theme["name"],
theme_description=theme["description"],
)
]

# Now populate the answers and corresponding themes etc. for these question parts
for respondent in respondents:
if question_part_type == models.QuestionPart.QuestionType.SINGLE_OPTION:
chosen_options = random.choice(part["options"])
text = ""
elif question_part_type == models.QuestionPart.QuestionType.MULTIPLE_OPTIONS:
chosen_options = random.sample(
part["options"], k=random.randint(1, len(part["options"]))
)
text = ""
elif question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
text = random.choice(part.get("free_text_answers", [""]))
chosen_options = []

answer = Answer2Factory(
question_part=question_part,
text=text,
chosen_options=chosen_options,
respondent=respondent,
)
# Now map (multiple) themes and sentiment to each answer for free-text questions.
# This is in a different order to how it would work in pipeline - but this is as we
# are reading from file.
if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
themes_for_answer = random.sample(
theme_objects, k=random.randint(1, len(theme_objects))
)
for theme in themes_for_answer:
ThemeMappingFactory(
answer=answer, theme=theme, execution_run=theme_mapping_run
)
sentiment = random.choice(models.SentimentMapping.PositionType.values)
SentimentMappingFactory(
answer=answer, execution_run=sentiment_run, position=sentiment
)

return consultation
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from django.core.management.base import BaseCommand

from consultation_analyser.factories2 import create_dummy_consultation_from_yaml
from consultation_analyser.consultations.dummy_data import create_dummy_consultation_from_yaml


class Command(BaseCommand):
Expand Down
19 changes: 0 additions & 19 deletions consultation_analyser/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import factory
import faker as _faker
import yaml
from django.utils import timezone

from consultation_analyser.authentication import models as authentication_models
Expand All @@ -17,24 +16,6 @@ def generate_dummy_topic_keywords():
return words.split(" ")


class FakeConsultationData:
def __init__(self):
with open("./tests/examples/questions.yml", "r") as f:
questions = yaml.safe_load(f)
slugs = [q["slug"] for q in questions]
self.questions = dict(zip(slugs, questions))

def question(self):
return random.choice(list(self.questions.values()))

def get_free_text_answer(self, slug):
q = self.questions[slug]
return random.choice(q["answers"])

def all_questions(self):
return list(self.questions.values())


class ConsultationBuilder:
def __init__(self, **kwargs):
consultation = ConsultationFactory(**kwargs)
Expand Down
103 changes: 0 additions & 103 deletions consultation_analyser/factories2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import random
from typing import Optional

import factory
import yaml
from factory.django import DjangoModelFactory
from faker import Faker

Expand Down Expand Up @@ -109,104 +107,3 @@ class Meta:
answer = factory.SubFactory(Answer2Factory)
execution_run = factory.SubFactory(ExecutionRunFactory)
position = factory.Iterator(models.SentimentMapping.PositionType.values)


def create_dummy_consultation_from_yaml(
file_path: str = "./tests/examples/sample_questions.yml",
number_respondents: int = 10,
consultation: Optional[models.Consultation2] = None,
) -> Consultation2Factory:
"""
Create consultation with question, question parts, answers and themes from yaml file.
Creates relevant objects: Consultation, Question, QuestionPart, Answer, Theme, ThemeMapping,
SentimentMapping, Framework, ExecutionRun.
"""
if not consultation:
consultation = Consultation2Factory()
respondents = [RespondentFactory(consultation=consultation) for _ in range(number_respondents)]

with open(file_path, "r") as file:
questions_data = yaml.safe_load(file)

# Save all questions, and corresponding question parts and answers
for question_data in questions_data:
question = Question2Factory(
text=question_data["question_text"],
order=question_data["order"],
consultation=consultation,
)
parts = question_data["parts"]

# Each question part is considered separately
for part in parts:
question_part_type = part["type"]
question_part = QuestionPartFactory(
question=question,
text=part["text"],
type=question_part_type,
options=part.get("options", []),
order=part["order"],
)

# Get themes if free_text
if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
# Simulate execution runs for each question to generate sentiment, themes, theme mapping
sentiment_run = ExecutionRunFactory(
type=models.ExecutionRun.TaskType.SENTIMENT_ANALYSIS
)
theme_generation_run = ExecutionRunFactory(
type=models.ExecutionRun.TaskType.THEME_GENERATION
)
framework = FrameworkFactory(
execution_run=theme_generation_run, question_part=question_part
)
theme_mapping_run = ExecutionRunFactory(
type=models.ExecutionRun.TaskType.THEME_MAPPING
)
themes = part.get("themes", [])
for theme in themes:
theme_objects = [
Theme2Factory(
framework=framework,
theme_name=theme["name"],
theme_description=theme["description"],
)
]

# Now populate the answers and corresponding themes etc. for these question parts
for respondent in respondents:
if question_part_type == models.QuestionPart.QuestionType.SINGLE_OPTION:
chosen_options = random.choice(part["options"])
text = ""
elif question_part_type == models.QuestionPart.QuestionType.MULTIPLE_OPTIONS:
chosen_options = random.sample(
part["options"], k=random.randint(1, len(part["options"]))
)
text = ""
elif question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
text = random.choice(part.get("free_text_answers", [""]))
chosen_options = []

answer = Answer2Factory(
question_part=question_part,
text=text,
chosen_options=chosen_options,
respondent=respondent,
)
# Now map (multiple) themes and sentiment to each answer for free-text questions.
# This is in a different order to how it would work in pipeline - but this is as we
# are reading from file.
if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT:
themes_for_answer = random.sample(
theme_objects, k=random.randint(1, len(theme_objects))
)
for theme in themes_for_answer:
ThemeMappingFactory(
answer=answer, theme=theme, execution_run=theme_mapping_run
)
sentiment = random.choice(models.SentimentMapping.PositionType.values)
SentimentMappingFactory(
answer=answer, execution_run=sentiment_run, position=sentiment
)

return consultation
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from consultation_analyser.consultations import models
from consultation_analyser.consultations.download_consultation import consultation_to_json
from consultation_analyser.factories2 import create_dummy_consultation_from_yaml
from consultation_analyser.consultations.dummy_data import create_dummy_consultation_from_yaml
from consultation_analyser.hosting_environment import HostingEnvironment

NO_SUMMARY_STR = "Unable to generate summary for this theme"
Expand Down
10 changes: 2 additions & 8 deletions tests/commands/test_dummy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,11 @@
def test_name_parameter_sets_consultation_name(mock_is_local):
call_command(
"generate_dummy_data",
name="My special consultation",
stdout=StringIO(), # we'll ignore this
)

assert models.Consultation.objects.count() == 1
assert models.Question.objects.count() == 10
assert models.Answer.objects.count() >= 100

assert models.Consultation.objects.first().name == "My special consultation"
assert models.Consultation.objects.first().slug == "my-special-consultation"
assert models.Consultation2.objects.count() == 1
assert models.Question2.objects.count() == 5


@pytest.mark.django_db
Expand All @@ -34,6 +29,5 @@ def test_the_tool_will_only_run_in_dev(environment):
):
call_command(
"generate_dummy_data",
name="My special consultation",
stdout=StringIO(), # we'll ignore this
)
Loading

0 comments on commit 8d05f63

Please sign in to comment.