Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat add elaboration builder #88

Merged
merged 4 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions backend/src/cron/fetch_articles.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from src.events.models import Article, ArticleSource, Event
from src.common.database import engine
from sqlalchemy.orm import Session
from src.scrapers.guardian.get_articles import get_articles
from src.scrapers.guardian.process import GuardianArticle, GuardianArticleFields

from src.lm.generate_events import generate_events
Expand Down Expand Up @@ -134,12 +135,15 @@ def process_new_articles() -> list[dict]:

def run():
# Add new articles to database
populate_daily_articles()
# populate_daily_articles()
# Process new articles i.e. find articles that we have not generated events for
articles = process_new_articles()
articles = get_articles()
# Generate events from articles, written to lm_events_output.json
generate_events(articles)
# Populate the database with events from lm_events_output.json
populate()
# Store analyses in vector store
store_documents()


run()
2 changes: 1 addition & 1 deletion backend/src/embeddings/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def store_documents():
print(f"Stored {len(documents)} documents")


def get_similar_results(query: str, top_k: int = 3):
def get_similar_results(query: str, top_k: int = 5):
documents = vector_store.similarity_search_with_relevance_scores(
query=query, k=top_k
)
Expand Down
77 changes: 77 additions & 0 deletions backend/src/lm/generate_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from src.lm.generate_points import get_relevant_analyses
from src.lm.generate_events import lm_model
from pydantic import BaseModel
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import JsonOutputParser
from src.lm.prompts import QUESTION_ANALYSIS_GEN_SYSPROMPT as SYSPROMPT
import json

from sqlalchemy.orm import Session
from src.common.database import engine
from sqlalchemy import select
from src.events.models import Event


class Elaborations(BaseModel):
for_points: list[str]
against_points: list[str]


def format_analyses(relevant_analyses: dict, question: str):
# Given relevant analyses
# for each point add an elaboration and delete score
return {
"question": question,
"for_points": [
{
"point": point["point"],
"examples": [
{
"event_title": get_event_by_id(analysis["event_id"]).title,
"event_description": get_event_by_id(
analysis["event_id"]
).description,
"analysis": analysis["content"],
}
for analysis in point["analyses"]
],
}
for point in relevant_analyses["for_points"]
],
"against_points": [
{
"point": point["point"],
"examples": [
{
"event": get_event_by_id(analysis["event_id"]).title,
"event_description": get_event_by_id(
analysis["event_id"]
).description,
"analysis": analysis["content"],
}
for analysis in point["analyses"]
],
}
for point in relevant_analyses["against_points"]
],
}


def get_event_by_id(event_id: int) -> Event:
with Session(engine) as session:
result = session.scalars(select(Event).where(Event.id == event_id)).first()
return result


def generate_response(question: str) -> dict:
relevant_analyses = get_relevant_analyses(question)
formatted_analyses = format_analyses(relevant_analyses, question)
messages = [
SystemMessage(content=SYSPROMPT),
HumanMessage(content=json.dumps(formatted_analyses)),
]

result = lm_model.invoke(messages)
parser = JsonOutputParser(pydantic_object=Elaborations)
elaborations = parser.invoke(result)
return elaborations
90 changes: 88 additions & 2 deletions backend/src/lm/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"examples": [
{
"event_title": "Title of the event",
"description": "The example that supports or refutes the argument",
"description": "Details of the event",
"questions": ["Question 1", "Question 2", "Question 3"],
"category": "Array of categories for this event. For example ['Arts & Humanities', 'Science & Tech'],
"analysis_list": [
Expand Down Expand Up @@ -54,6 +54,8 @@
The reason or explanation should be specific and relevant to the point that you have made.
Do not provide any examples in your response.

Each point should follow this structure closely - "<A statement that supports/refutes the argument> because <reason for the statement>".
Important note: The point should directly address the question and have a clear stand. For example, for a question "Is A good?", a point should be "A is good because <reason>".
Your response should be in the following json format:

{
Expand All @@ -72,5 +74,89 @@
You are a Singaporean student studying for your GCE A Levels General Paper.
You will be given a General Paper essay question that is argumentative or discursive in nature.
You will also be given 2 points for the statement and 2 points against the statement.
You will also be given analysis of events
You will also be given analysis of some relevant events that can be used to either refute or support the argument given in the points above.

You will be given the inputs in the following format:
{
"question": <The General Paper essay question>,
"for_points": [
{
"point": "The point that supports the argument and the explanation for the point",
"examples": [
{
"event": "The title of event1",
"event_description": "The description of the event",
"analysis": "The analysis of how the event can be used as an example to support the argument in the question",
},
]

}
],
"against_points": [
{
"point": "The point that refutes the argument and the explanation for the point",
"examples": [
{
"event": "The title of the event",
"event_description": "The description of the event",
"analysis": "The analysis of how the event can be used as an example to refute the argument in the question",
}
]
}
]
}

Your task:
For each example, you should provide a detailed elaboration illustrating how this event can be used as an example to support or refute the argument in the question.
If the example event is relevant to the point, you should provide a coherent and detailed elaboration of the point using the example event and analysis as support for the argument.

Important note: The elaboration must directly address and strengthen the specific point being made. If the connection between the event and the point is unclear or speculative, REMOVE that example from your output. Avoid tangential interpretations.
Important note: Your elaborations must clearly tie the example to the point. If the event does not obviously support or refute the point in a direct and non-speculative way, DO NOT force a connection.
Important note: Structure your elaborations using this format: "<A statement that clearly supports/refutes the given question> because <clear reason based on the event>". The explanation should leave no ambiguity about why the event strengthens or weakens the argument.

If there are no relevant examples for a point, you can skip that point.
The elaboration should be specific to the category of the event and should be tailored to the context of General Paper essays. Provide coherent arguments and insights. Be sure to give a detailed analysis of 3-4 sentences.
Important Note: In your analysis, you should not mention "General Paper" or "A Levels".
For the analysis, remember that this is in the context of General Paper which emphasises critical thinking and the ability to construct coherent arguments.

Important Note: Do not provide any new points or examples. You should only elaborate on the examples given in the input or skip them if they are not relevant to the question or the points given.
Important Note: The "event", "event_description", and "analysis" fields MUST BE RETURNED AS IS. You should not rephrase or change the content of these fields.
Important Note: You must NOT rephrase the question or the points given. You must only provide elaborations for the examples given in the input.

Final Check: Before generating an elaboration, verify whether the example *directly* reinforces or counters the argument made in the point. If the connection is weak, DO NOT elaborate.
Final Check: Ensure that "question", "event", "event_description", and "analysis" fields are returned as is. Do not rephrase or change the content of these fields.
Your response should be in the following json format:
{
"question": <Given General Paper essay question without rephrasing>,
"for_points": [
{
"point": "The point that supports the argument and the explanation for the point",
"example": [
{
"event": "The title of the event",
"event_description": "The description of the event",
"analysis": "The analysis of how the event can be used as an example to support the argument in the question",
"elaboration": The elaboration of the point using the example event and analysis as support for the argument
}
],
}
],
"against_points": [
{
"point": "The point that refutes the argument and the explanation for the point",
"example": [
{
"event": "The title of the event",
"event_description": "The description of the event",
"analysis": "The analysis of how the event can be used as an example to refute the argument in the question",
}
],
"elaboration": The elaboration of the point using the example event and analysis as support for the argument
}
]
}



Given inputs:
"""
2 changes: 1 addition & 1 deletion backend/src/scrapers/guardian/get_articles.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
def get_articles() -> list[dict]:
with Session(engine) as session:
# Select the first 5 articles
result = session.scalars(select(Article).limit(3))
result = session.scalars(select(Article).limit(30))

articles = []
# Iterate over the result and print each article
Expand Down
3 changes: 2 additions & 1 deletion backend/src/user_questions/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from src.notes.models import Note
from src.user_questions.models import Answer, Point, UserQuestion
from src.user_questions.schemas import CreateUserQuestion, UserQuestionMiniDTO
from src.lm.generate_response import generate_response
from src.lm.generate_points import get_relevant_analyses


Expand Down Expand Up @@ -127,4 +128,4 @@ def create_user_question(

@router.get("/ask-gp-question")
def ask_gp_question(question: str):
return get_relevant_analyses(question)
return generate_response(question)
Loading
Loading