Skip to content

Commit

Permalink
Merge pull request #88 from cs3216-a3-group-4/feat-add-elaboration-bu…
Browse files Browse the repository at this point in the history
…ilder

Feat add elaboration builder
  • Loading branch information
marcus-ny authored Sep 25, 2024
2 parents 1e7d23f + 6d1b79e commit ed144d1
Show file tree
Hide file tree
Showing 7 changed files with 1,809 additions and 7 deletions.
8 changes: 6 additions & 2 deletions backend/src/cron/fetch_articles.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from src.events.models import Article, ArticleSource, Event
from src.common.database import engine
from sqlalchemy.orm import Session
from src.scrapers.guardian.get_articles import get_articles
from src.scrapers.guardian.process import GuardianArticle, GuardianArticleFields

from src.lm.generate_events import generate_events
Expand Down Expand Up @@ -134,12 +135,15 @@ def process_new_articles() -> list[dict]:

def run():
# Add new articles to database
populate_daily_articles()
# populate_daily_articles()
# Process new articles i.e. find articles that we have not generated events for
articles = process_new_articles()
articles = get_articles()
# Generate events from articles, written to lm_events_output.json
generate_events(articles)
# Populate the database with events from lm_events_output.json
populate()
# Store analyses in vector store
store_documents()


run()
2 changes: 1 addition & 1 deletion backend/src/embeddings/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def store_documents():
print(f"Stored {len(documents)} documents")


def get_similar_results(query: str, top_k: int = 3):
def get_similar_results(query: str, top_k: int = 5):
documents = vector_store.similarity_search_with_relevance_scores(
query=query, k=top_k
)
Expand Down
77 changes: 77 additions & 0 deletions backend/src/lm/generate_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from src.lm.generate_points import get_relevant_analyses
from src.lm.generate_events import lm_model
from pydantic import BaseModel
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import JsonOutputParser
from src.lm.prompts import QUESTION_ANALYSIS_GEN_SYSPROMPT as SYSPROMPT
import json

from sqlalchemy.orm import Session
from src.common.database import engine
from sqlalchemy import select
from src.events.models import Event


class Elaborations(BaseModel):
for_points: list[str]
against_points: list[str]


def format_analyses(relevant_analyses: dict, question: str):
# Given relevant analyses
# for each point add an elaboration and delete score
return {
"question": question,
"for_points": [
{
"point": point["point"],
"examples": [
{
"event_title": get_event_by_id(analysis["event_id"]).title,
"event_description": get_event_by_id(
analysis["event_id"]
).description,
"analysis": analysis["content"],
}
for analysis in point["analyses"]
],
}
for point in relevant_analyses["for_points"]
],
"against_points": [
{
"point": point["point"],
"examples": [
{
"event": get_event_by_id(analysis["event_id"]).title,
"event_description": get_event_by_id(
analysis["event_id"]
).description,
"analysis": analysis["content"],
}
for analysis in point["analyses"]
],
}
for point in relevant_analyses["against_points"]
],
}


def get_event_by_id(event_id: int) -> Event:
with Session(engine) as session:
result = session.scalars(select(Event).where(Event.id == event_id)).first()
return result


def generate_response(question: str) -> dict:
relevant_analyses = get_relevant_analyses(question)
formatted_analyses = format_analyses(relevant_analyses, question)
messages = [
SystemMessage(content=SYSPROMPT),
HumanMessage(content=json.dumps(formatted_analyses)),
]

result = lm_model.invoke(messages)
parser = JsonOutputParser(pydantic_object=Elaborations)
elaborations = parser.invoke(result)
return elaborations
90 changes: 88 additions & 2 deletions backend/src/lm/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"examples": [
{
"event_title": "Title of the event",
"description": "The example that supports or refutes the argument",
"description": "Details of the event",
"questions": ["Question 1", "Question 2", "Question 3"],
"category": "Array of categories for this event. For example ['Arts & Humanities', 'Science & Tech'],
"analysis_list": [
Expand Down Expand Up @@ -54,6 +54,8 @@
The reason or explanation should be specific and relevant to the point that you have made.
Do not provide any examples in your response.
Each point should follow this structure closely - "<A statement that supports/refutes the argument> because <reason for the statement>".
Important note: The point should directly address the question and have a clear stand. For example, for a question "Is A good?", a point should be "A is good because <reason>".
Your response should be in the following json format:
{
Expand All @@ -72,5 +74,89 @@
You are a Singaporean student studying for your GCE A Levels General Paper.
You will be given a General Paper essay question that is argumentative or discursive in nature.
You will also be given 2 points for the statement and 2 points against the statement.
You will also be given analysis of events
You will also be given analysis of some relevant events that can be used to either refute or support the argument given in the points above.
You will be given the inputs in the following format:
{
"question": <The General Paper essay question>,
"for_points": [
{
"point": "The point that supports the argument and the explanation for the point",
"examples": [
{
"event": "The title of event1",
"event_description": "The description of the event",
"analysis": "The analysis of how the event can be used as an example to support the argument in the question",
},
]
}
],
"against_points": [
{
"point": "The point that refutes the argument and the explanation for the point",
"examples": [
{
"event": "The title of the event",
"event_description": "The description of the event",
"analysis": "The analysis of how the event can be used as an example to refute the argument in the question",
}
]
}
]
}
Your task:
For each example, you should provide a detailed elaboration illustrating how this event can be used as an example to support or refute the argument in the question.
If the example event is relevant to the point, you should provide a coherent and detailed elaboration of the point using the example event and analysis as support for the argument.
Important note: The elaboration must directly address and strengthen the specific point being made. If the connection between the event and the point is unclear or speculative, REMOVE that example from your output. Avoid tangential interpretations.
Important note: Your elaborations must clearly tie the example to the point. If the event does not obviously support or refute the point in a direct and non-speculative way, DO NOT force a connection.
Important note: Structure your elaborations using this format: "<A statement that clearly supports/refutes the given question> because <clear reason based on the event>". The explanation should leave no ambiguity about why the event strengthens or weakens the argument.
If there are no relevant examples for a point, you can skip that point.
The elaboration should be specific to the category of the event and should be tailored to the context of General Paper essays. Provide coherent arguments and insights. Be sure to give a detailed analysis of 3-4 sentences.
Important Note: In your analysis, you should not mention "General Paper" or "A Levels".
For the analysis, remember that this is in the context of General Paper which emphasises critical thinking and the ability to construct coherent arguments.
Important Note: Do not provide any new points or examples. You should only elaborate on the examples given in the input or skip them if they are not relevant to the question or the points given.
Important Note: The "event", "event_description", and "analysis" fields MUST BE RETURNED AS IS. You should not rephrase or change the content of these fields.
Important Note: You must NOT rephrase the question or the points given. You must only provide elaborations for the examples given in the input.
Final Check: Before generating an elaboration, verify whether the example *directly* reinforces or counters the argument made in the point. If the connection is weak, DO NOT elaborate.
Final Check: Ensure that "question", "event", "event_description", and "analysis" fields are returned as is. Do not rephrase or change the content of these fields.
Your response should be in the following json format:
{
"question": <Given General Paper essay question without rephrasing>,
"for_points": [
{
"point": "The point that supports the argument and the explanation for the point",
"example": [
{
"event": "The title of the event",
"event_description": "The description of the event",
"analysis": "The analysis of how the event can be used as an example to support the argument in the question",
"elaboration": The elaboration of the point using the example event and analysis as support for the argument
}
],
}
],
"against_points": [
{
"point": "The point that refutes the argument and the explanation for the point",
"example": [
{
"event": "The title of the event",
"event_description": "The description of the event",
"analysis": "The analysis of how the event can be used as an example to refute the argument in the question",
}
],
"elaboration": The elaboration of the point using the example event and analysis as support for the argument
}
]
}
Given inputs:
"""
2 changes: 1 addition & 1 deletion backend/src/scrapers/guardian/get_articles.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
def get_articles() -> list[dict]:
with Session(engine) as session:
# Select the first 5 articles
result = session.scalars(select(Article).limit(3))
result = session.scalars(select(Article).limit(30))

articles = []
# Iterate over the result and print each article
Expand Down
3 changes: 2 additions & 1 deletion backend/src/user_questions/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from src.notes.models import Note
from src.user_questions.models import Answer, Point, UserQuestion
from src.user_questions.schemas import CreateUserQuestion, UserQuestionMiniDTO
from src.lm.generate_response import generate_response
from src.lm.generate_points import get_relevant_analyses


Expand Down Expand Up @@ -127,4 +128,4 @@ def create_user_question(

@router.get("/ask-gp-question")
def ask_gp_question(question: str):
return get_relevant_analyses(question)
return generate_response(question)
Loading

0 comments on commit ed144d1

Please sign in to comment.