-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
change generating dummy data to use new models
- Loading branch information
Showing
8 changed files
with
156 additions
and
236 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,123 @@ | ||
import datetime | ||
import random | ||
from typing import Optional | ||
|
||
from consultation_analyser.factories import ( | ||
ConsultationBuilder, | ||
FakeConsultationData, | ||
import yaml | ||
|
||
from consultation_analyser.consultations import models | ||
from consultation_analyser.factories2 import ( | ||
Answer2Factory, | ||
Consultation2Factory, | ||
ExecutionRunFactory, | ||
FrameworkFactory, | ||
Question2Factory, | ||
QuestionPartFactory, | ||
RespondentFactory, | ||
SentimentMappingFactory, | ||
Theme2Factory, | ||
ThemeMappingFactory, | ||
) | ||
from consultation_analyser.hosting_environment import HostingEnvironment | ||
|
||
|
||
def create_dummy_data(responses=20, number_questions=10, **options): | ||
if number_questions > 10: | ||
raise RuntimeError("You can't have more than 10 questions") | ||
def create_dummy_consultation_from_yaml( | ||
file_path: str = "./tests/examples/sample_questions.yml", | ||
number_respondents: int = 10, | ||
consultation: Optional[models.Consultation2] = None, | ||
) -> Consultation2Factory: | ||
""" | ||
Create consultation with question, question parts, answers and themes from yaml file. | ||
Creates relevant objects: Consultation, Question, QuestionPart, Answer, Theme, ThemeMapping, | ||
SentimentMapping, Framework, ExecutionRun. | ||
""" | ||
if HostingEnvironment.is_production(): | ||
raise RuntimeError("Dummy data generation should not be run in production") | ||
|
||
# Timestamp to avoid duplicates - set these as default options | ||
timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") | ||
if "name" not in options: | ||
options["name"] = f"Dummy consultation generated at {timestamp}" | ||
if "slug" not in options: | ||
options["slug"] = f"consultation-slug-{timestamp}" | ||
|
||
consultation_builder = ConsultationBuilder(**options) | ||
fake_consultation_data = FakeConsultationData() | ||
all_questions = fake_consultation_data.all_questions() | ||
questions_to_include = all_questions[:number_questions] | ||
|
||
questions = [ | ||
consultation_builder.add_question( | ||
text=q["text"], | ||
slug=q["slug"], | ||
multiple_choice_questions=[ | ||
(x["question_text"], x["options"]) for x in (q.get("multiple_choice") or []) | ||
], | ||
has_free_text=q["has_free_text"], | ||
if not consultation: | ||
consultation = Consultation2Factory() | ||
respondents = [RespondentFactory(consultation=consultation) for _ in range(number_respondents)] | ||
|
||
with open(file_path, "r") as file: | ||
questions_data = yaml.safe_load(file) | ||
|
||
# Save all questions, and corresponding question parts and answers | ||
for question_data in questions_data: | ||
question = Question2Factory( | ||
text=question_data["question_text"], | ||
order=question_data["order"], | ||
consultation=consultation, | ||
) | ||
for q in questions_to_include | ||
] | ||
|
||
for i, r in enumerate(range(responses)): | ||
for q in questions: | ||
if q.has_free_text: | ||
if random.randrange(1, 4) == 1: | ||
free_text_answer = "" | ||
else: | ||
free_text_answer = fake_consultation_data.get_free_text_answer(q.slug) | ||
else: | ||
free_text_answer = None | ||
|
||
consultation_builder.add_answer(q, free_text=free_text_answer) | ||
consultation_builder.next_response() | ||
|
||
# always assign a double multichoice selection to the last question | ||
question_options = q.multiple_choice_options[0] | ||
answers = (question_options["question_text"], question_options["options"][:2]) | ||
consultation_builder.add_answer(q, multiple_choice_answers=[answers]) | ||
|
||
return consultation_builder.consultation | ||
parts = question_data["parts"] | ||
|
||
# Each question part is considered separately | ||
for part in parts: | ||
question_part_type = part["type"] | ||
question_part = QuestionPartFactory( | ||
question=question, | ||
text=part["text"], | ||
type=question_part_type, | ||
options=part.get("options", []), | ||
order=part["order"], | ||
) | ||
|
||
# Get themes if free_text | ||
if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT: | ||
# Simulate execution runs for each question to generate sentiment, themes, theme mapping | ||
sentiment_run = ExecutionRunFactory( | ||
type=models.ExecutionRun.TaskType.SENTIMENT_ANALYSIS | ||
) | ||
theme_generation_run = ExecutionRunFactory( | ||
type=models.ExecutionRun.TaskType.THEME_GENERATION | ||
) | ||
framework = FrameworkFactory( | ||
execution_run=theme_generation_run, question_part=question_part | ||
) | ||
theme_mapping_run = ExecutionRunFactory( | ||
type=models.ExecutionRun.TaskType.THEME_MAPPING | ||
) | ||
themes = part.get("themes", []) | ||
for theme in themes: | ||
theme_objects = [ | ||
Theme2Factory( | ||
framework=framework, | ||
theme_name=theme["name"], | ||
theme_description=theme["description"], | ||
) | ||
] | ||
|
||
# Now populate the answers and corresponding themes etc. for these question parts | ||
for respondent in respondents: | ||
if question_part_type == models.QuestionPart.QuestionType.SINGLE_OPTION: | ||
chosen_options = random.choice(part["options"]) | ||
text = "" | ||
elif question_part_type == models.QuestionPart.QuestionType.MULTIPLE_OPTIONS: | ||
chosen_options = random.sample( | ||
part["options"], k=random.randint(1, len(part["options"])) | ||
) | ||
text = "" | ||
elif question_part_type == models.QuestionPart.QuestionType.FREE_TEXT: | ||
text = random.choice(part.get("free_text_answers", [""])) | ||
chosen_options = [] | ||
|
||
answer = Answer2Factory( | ||
question_part=question_part, | ||
text=text, | ||
chosen_options=chosen_options, | ||
respondent=respondent, | ||
) | ||
# Now map (multiple) themes and sentiment to each answer for free-text questions. | ||
# This is in a different order to how it would work in pipeline - but this is as we | ||
# are reading from file. | ||
if question_part_type == models.QuestionPart.QuestionType.FREE_TEXT: | ||
themes_for_answer = random.sample( | ||
theme_objects, k=random.randint(1, len(theme_objects)) | ||
) | ||
for theme in themes_for_answer: | ||
ThemeMappingFactory( | ||
answer=answer, theme=theme, execution_run=theme_mapping_run | ||
) | ||
sentiment = random.choice(models.SentimentMapping.PositionType.values) | ||
SentimentMappingFactory( | ||
answer=answer, execution_run=sentiment_run, position=sentiment | ||
) | ||
|
||
return consultation |
2 changes: 1 addition & 1 deletion
2
consultation_analyser/consultations/management/commands/generate_dummy_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.