jippy-the-frog · marcus-ny · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/backend/src/cron/fetch_articles.py b/backend/src/cron/fetch_articles.py
@@ -6,6 +6,7 @@
 from src.events.models import Article, ArticleSource, Event
 from src.common.database import engine
 from sqlalchemy.orm import Session
+from src.scrapers.guardian.get_articles import get_articles
 from src.scrapers.guardian.process import GuardianArticle, GuardianArticleFields
 
 from src.lm.generate_events import generate_events
@@ -134,12 +135,15 @@ def process_new_articles() -> list[dict]:
 
 def run():
     # Add new articles to database
-    populate_daily_articles()
+    # populate_daily_articles()
     # Process new articles i.e. find articles that we have not generated events for
-    articles = process_new_articles()
+    articles = get_articles()
     # Generate events from articles, written to lm_events_output.json
     generate_events(articles)
     # Populate the database with events from lm_events_output.json
     populate()
     # Store analyses in vector store
     store_documents()
+
+
+run()
diff --git a/backend/src/embeddings/vector_store.py b/backend/src/embeddings/vector_store.py
@@ -80,7 +80,7 @@ def store_documents():
     print(f"Stored {len(documents)} documents")
 
 
-def get_similar_results(query: str, top_k: int = 3):
+def get_similar_results(query: str, top_k: int = 5):
     documents = vector_store.similarity_search_with_relevance_scores(
         query=query, k=top_k
     )

diff --git a/backend/src/lm/generate_response.py b/backend/src/lm/generate_response.py
@@ -0,0 +1,77 @@
+from src.lm.generate_points import get_relevant_analyses
+from src.lm.generate_events import lm_model
+from pydantic import BaseModel
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.output_parsers import JsonOutputParser
+from src.lm.prompts import QUESTION_ANALYSIS_GEN_SYSPROMPT as SYSPROMPT
+import json
+
+from sqlalchemy.orm import Session
+from src.common.database import engine
+from sqlalchemy import select
+from src.events.models import Event
+
+
+class Elaborations(BaseModel):
+    for_points: list[str]
+    against_points: list[str]
+
+
+def format_analyses(relevant_analyses: dict, question: str):
+    # Given relevant analyses
+    # for each point add an elaboration and delete score
+    return {
+        "question": question,
+        "for_points": [
+            {
+                "point": point["point"],
+                "examples": [
+                    {
+                        "event_title": get_event_by_id(analysis["event_id"]).title,
+                        "event_description": get_event_by_id(
+                            analysis["event_id"]
+                        ).description,
+                        "analysis": analysis["content"],
+                    }
+                    for analysis in point["analyses"]
+                ],
+            }
+            for point in relevant_analyses["for_points"]
+        ],
+        "against_points": [
+            {
+                "point": point["point"],
+                "examples": [
+                    {
+                        "event": get_event_by_id(analysis["event_id"]).title,
+                        "event_description": get_event_by_id(
+                            analysis["event_id"]
+                        ).description,
+                        "analysis": analysis["content"],
+                    }
+                    for analysis in point["analyses"]
+                ],
+            }
+            for point in relevant_analyses["against_points"]
+        ],
+    }
+
+
+def get_event_by_id(event_id: int) -> Event:
+    with Session(engine) as session:
+        result = session.scalars(select(Event).where(Event.id == event_id)).first()
+        return result
+
+
+def generate_response(question: str) -> dict:
+    relevant_analyses = get_relevant_analyses(question)
+    formatted_analyses = format_analyses(relevant_analyses, question)
+    messages = [
+        SystemMessage(content=SYSPROMPT),
+        HumanMessage(content=json.dumps(formatted_analyses)),
+    ]
+
+    result = lm_model.invoke(messages)
+    parser = JsonOutputParser(pydantic_object=Elaborations)
+    elaborations = parser.invoke(result)
+    return elaborations
diff --git a/backend/src/lm/prompts.py b/backend/src/lm/prompts.py
@@ -23,7 +23,7 @@
     "examples": [
         { 
         "event_title": "Title of the event",
-        "description": "The example that supports or refutes the argument",
+        "description": "Details of the event",
         "questions": ["Question 1", "Question 2", "Question 3"],
         "category": "Array of categories for this event. For example ['Arts & Humanities', 'Science & Tech'], 
         "analysis_list": [
@@ -54,6 +54,8 @@
     The reason or explanation should be specific and relevant to the point that you have made.
     Do not provide any examples in your response.
 
+    Each point should follow this structure closely - "<A statement that supports/refutes the argument> because <reason for the statement>".
+    Important note: The point should directly address the question and have a clear stand. For example, for a question "Is A good?", a point should be "A is good because <reason>".
     Your response should be in the following json format:
 
         {
@@ -72,5 +74,89 @@
     You are a Singaporean student studying for your GCE A Levels General Paper.
     You will be given a General Paper essay question that is argumentative or discursive in nature.
     You will also be given 2 points for the statement and 2 points against the statement.
-    You will also be given analysis of events 
+    You will also be given analysis of some relevant events that can be used to either refute or support the argument given in the points above.
+
+    You will be given the inputs in the following format:
+    {
+        "question": <The General Paper essay question>,
+        "for_points": [
+            {
+                "point": "The point that supports the argument and the explanation for the point",
+                "examples": [
+                    {
+                        "event": "The title of event1",
+                        "event_description": "The description of the event",
+                        "analysis": "The analysis of how the event can be used as an example to support the argument in the question",
+                    },
+                ]
+
+            }
+        ],
+        "against_points": [
+            {
+                "point": "The point that refutes the argument and the explanation for the point",
+                "examples": [
+                    {
+                        "event": "The title of the event",
+                        "event_description": "The description of the event",
+                        "analysis": "The analysis of how the event can be used as an example to refute the argument in the question",
+                    }
+                ]    
+            }
+        ]
+    }
+
+    Your task:
+    For each example, you should provide a detailed elaboration illustrating how this event can be used as an example to support or refute the argument in the question.
+    If the example event is relevant to the point, you should provide a coherent and detailed elaboration of the point using the example event and analysis as support for the argument.
+
+    Important note: The elaboration must directly address and strengthen the specific point being made. If the connection between the event and the point is unclear or speculative, REMOVE that example from your output. Avoid tangential interpretations.
+    Important note: Your elaborations must clearly tie the example to the point. If the event does not obviously support or refute the point in a direct and non-speculative way, DO NOT force a connection.
+    Important note: Structure your elaborations using this format: "<A statement that clearly supports/refutes the given question> because <clear reason based on the event>". The explanation should leave no ambiguity about why the event strengthens or weakens the argument.
+
+    If there are no relevant examples for a point, you can skip that point.
+    The elaboration should be specific to the category of the event and should be tailored to the context of General Paper essays. Provide coherent arguments and insights. Be sure to give a detailed analysis of 3-4 sentences.
+    Important Note: In your analysis, you should not mention "General Paper" or "A Levels".
+    For the analysis, remember that this is in the context of General Paper which emphasises critical thinking and the ability to construct coherent arguments.
+
+    Important Note: Do not provide any new points or examples. You should only elaborate on the examples given in the input or skip them if they are not relevant to the question or the points given.
+    Important Note: The "event", "event_description", and "analysis" fields MUST BE RETURNED AS IS. You should not rephrase or change the content of these fields.
+    Important Note: You must NOT rephrase the question or the points given. You must only provide elaborations for the examples given in the input.
+
+    Final Check: Before generating an elaboration, verify whether the example *directly* reinforces or counters the argument made in the point. If the connection is weak, DO NOT elaborate.
+    Final Check: Ensure that "question", "event", "event_description", and "analysis" fields are returned as is. Do not rephrase or change the content of these fields.
+    Your response should be in the following json format:
+    {
+        "question": <Given General Paper essay question without rephrasing>,
+        "for_points": [
+            {
+                "point": "The point that supports the argument and the explanation for the point",
+                "example": [
+                    {
+                        "event": "The title of the event",
+                        "event_description": "The description of the event",
+                        "analysis": "The analysis of how the event can be used as an example to support the argument in the question",
+                        "elaboration": The elaboration of the point using the example event and analysis as support for the argument
+                    }
+                ],
+            }
+        ],
+        "against_points": [
+            {
+                "point": "The point that refutes the argument and the explanation for the point",
+                "example": [
+                    {
+                        "event": "The title of the event",
+                        "event_description": "The description of the event",
+                        "analysis": "The analysis of how the event can be used as an example to refute the argument in the question",
+                    }
+                ],
+                "elaboration": The elaboration of the point using the example event and analysis as support for the argument
+            }
+        ]
+    }
+
+
+
+    Given inputs:
 """
diff --git a/backend/src/scrapers/guardian/get_articles.py b/backend/src/scrapers/guardian/get_articles.py
@@ -7,7 +7,7 @@
 def get_articles() -> list[dict]:
     with Session(engine) as session:
         # Select the first 5 articles
-        result = session.scalars(select(Article).limit(3))
+        result = session.scalars(select(Article).limit(30))
 
         articles = []
         # Iterate over the result and print each article

diff --git a/backend/src/user_questions/router.py b/backend/src/user_questions/router.py
@@ -11,6 +11,7 @@
 from src.notes.models import Note
 from src.user_questions.models import Answer, Point, UserQuestion
 from src.user_questions.schemas import CreateUserQuestion, UserQuestionMiniDTO
+from src.lm.generate_response import generate_response
 from src.lm.generate_points import get_relevant_analyses
 
 
@@ -127,4 +128,4 @@ def create_user_question(
 
 @router.get("/ask-gp-question")
 def ask_gp_question(question: str):
-    return get_relevant_analyses(question)
+    return generate_response(question)