google-gemini · kahirokunn · Jun 4, 2025 · Jun 4, 2025
diff --git a/backend/src/agent/graph.py b/backend/src/agent/graph.py
@@ -1,6 +1,6 @@
 import os
 
-from agent.tools_and_schemas import SearchQueryList, Reflection
+from agent.tools_and_schemas import SearchQueryList, Reflection, ConfirmationQuestion
 from dotenv import load_dotenv
 from langchain_core.messages import AIMessage
 from langgraph.types import Send
@@ -22,6 +22,7 @@
     web_searcher_instructions,
     reflection_instructions,
     answer_instructions,
+    confirmation_question_instructions,
 )
 from langchain_google_genai import ChatGoogleGenerativeAI
 from agent.utils import (
@@ -40,7 +41,72 @@
 genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))
 
 
+# Utility functions for confirmation flow
+def should_skip_confirmation(messages: list) -> bool:
+    """Determine if confirmation should be skipped"""
+    if not messages:
+        return False
+
+    last_message = messages[-1].content.lower()
+    skip_phrases = ["answer immediately", "no questions", "skip questions", "without questions", "no confirmation"]
+
+    return any(phrase in last_message for phrase in skip_phrases)
+
+
+def check_initial_state(state: OverallState) -> str:
+    """Check initial state and determine next node"""
+    # If confirmation is completed
+    if state.get("confirmation_completed", False):
+        return "generate_query"
+
+    # If skip flag is set
+    if state.get("skip_confirmation", False):
+        return "generate_query"
+
+    # If message contains skip instruction
+    if should_skip_confirmation(state.get("messages", [])):
+        return "generate_query"
+
+    # If there are 2 or more messages (response to confirmation question exists)
+    if len(state.get("messages", [])) >= 2:
+        return "generate_query"
+
+    # Otherwise, ask confirmation question
+    return "confirmation_question"
+
+
 # Nodes
+def confirmation_question(state: OverallState, config: RunnableConfig) -> dict:
+    """Node that generates confirmation question for initial message"""
+    configurable = Configuration.from_runnable_config(config)
+
+    # Use Gemini 2.0 Flash
+    llm = ChatGoogleGenerativeAI(
+        model=configurable.query_generator_model,
+        temperature=0.7,
+        max_retries=2,
+        api_key=os.getenv("GEMINI_API_KEY"),
+    )
+    structured_llm = llm.with_structured_output(ConfirmationQuestion)
+
+    # Format prompt
+    formatted_prompt = confirmation_question_instructions.format(
+        research_topic=get_research_topic(state["messages"])
+    )
+
+    # Generate confirmation question
+    result = structured_llm.invoke(formatted_prompt)
+
+    # Return as AI message with LLM-generated skip instruction
+    confirmation_message = f"{result.question}\n\n({result.skip_instruction})"
+
+    return {
+        "messages": [AIMessage(content=confirmation_message)],
+        "confirmation_completed": False,
+        "skip_confirmation": False
+    }
+
+
 def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
     """LangGraph node that generates a search queries based on the User's question.
 
@@ -56,6 +122,9 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati
     """
     configurable = Configuration.from_runnable_config(config)
 
+    # Set confirmation completed flag
+    state["confirmation_completed"] = True
+
     # check for custom initial search query count
     if state.get("initial_search_query_count") is None:
         state["initial_search_query_count"] = configurable.number_of_initial_queries
@@ -151,9 +220,9 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
         Dictionary with state update, including search_query key containing the generated follow-up query
     """
     configurable = Configuration.from_runnable_config(config)
-    # Increment the research loop count and get the reasoning model
+    # Increment the research loop count and get the reflection model
     state["research_loop_count"] = state.get("research_loop_count", 0) + 1
-    reasoning_model = state.get("reasoning_model") or configurable.reasoning_model
+    reflection_model = state.get("reflection_model") or configurable.reflection_model
 
     # Format the prompt
     current_date = get_current_date()
@@ -162,9 +231,9 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
         research_topic=get_research_topic(state["messages"]),
         summaries="\n\n---\n\n".join(state["web_research_result"]),
     )
-    # init Reasoning Model
+    # init Reflection Model
     llm = ChatGoogleGenerativeAI(
-        model=reasoning_model,
+        model=reflection_model,
         temperature=1.0,
         max_retries=2,
         api_key=os.getenv("GEMINI_API_KEY"),
@@ -231,7 +300,7 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
         Dictionary with state update, including running_summary key containing the formatted final summary with sources
     """
     configurable = Configuration.from_runnable_config(config)
-    reasoning_model = state.get("reasoning_model") or configurable.reasoning_model
+    answer_model = state.get("answer_model") or configurable.answer_model
 
     # Format the prompt
     current_date = get_current_date()
@@ -241,9 +310,9 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
         summaries="\n---\n\n".join(state["web_research_result"]),
     )
 
-    # init Reasoning Model, default to Gemini 2.5 Flash
+    # init Answer Model, default to Gemini 2.5 Pro
     llm = ChatGoogleGenerativeAI(
-        model=reasoning_model,
+        model=answer_model,
         temperature=0,
         max_retries=2,
         api_key=os.getenv("GEMINI_API_KEY"),
@@ -268,26 +337,34 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
 # Create our Agent Graph
 builder = StateGraph(OverallState, config_schema=Configuration)
 
-# Define the nodes we will cycle between
+# Define the nodes
+builder.add_node("confirmation_question", confirmation_question)
 builder.add_node("generate_query", generate_query)
 builder.add_node("web_research", web_research)
 builder.add_node("reflection", reflection)
 builder.add_node("finalize_answer", finalize_answer)
 
-# Set the entrypoint as `generate_query`
-# This means that this node is the first one called
-builder.add_edge(START, "generate_query")
-# Add conditional edge to continue with search queries in a parallel branch
+# Conditional routing at entry point
+builder.add_conditional_edges(
+    START,
+    check_initial_state,
+    {
+        "confirmation_question": "confirmation_question",
+        "generate_query": "generate_query"
+    }
+)
+
+# End after confirmation question (wait for user response)
+builder.add_edge("confirmation_question", END)
+
+# Continue with existing flow after generate_query
 builder.add_conditional_edges(
     "generate_query", continue_to_web_research, ["web_research"]
 )
-# Reflect on the web research
 builder.add_edge("web_research", "reflection")
-# Evaluate the research
 builder.add_conditional_edges(
     "reflection", evaluate_research, ["web_research", "finalize_answer"]
 )
-# Finalize the answer
 builder.add_edge("finalize_answer", END)
 
 graph = builder.compile(name="pro-search-agent")
diff --git a/backend/src/agent/prompts.py b/backend/src/agent/prompts.py
@@ -6,7 +6,9 @@ def get_current_date():
     return datetime.now().strftime("%B %d, %Y")
 
 
-query_writer_instructions = """Your goal is to generate sophisticated and diverse web search queries. These queries are intended for an advanced automated web research tool capable of analyzing complex results, following links, and synthesizing information.
+query_writer_instructions = """**IMPORTANT: Respond in the same language as the user's input.**
+
+Your goal is to generate sophisticated and diverse web search queries. These queries are intended for an advanced automated web research tool capable of analyzing complex results, following links, and synthesizing information.
 
 Instructions:
 - Always prefer a single search query, only add another query if the original question requests multiple aspects or elements and one query is not enough.
@@ -16,7 +18,7 @@ def get_current_date():
 - Don't generate multiple similar queries, 1 is enough.
 - Query should ensure that the most current information is gathered. The current date is {current_date}.
 
-Format: 
+Format:
 - Format your response as a JSON object with ALL three of these exact keys:
    - "rationale": Brief explanation of why these queries are relevant
    - "query": A list of search queries
@@ -40,14 +42,16 @@ def get_current_date():
 - Query should ensure that the most current information is gathered. The current date is {current_date}.
 - Conduct multiple, diverse searches to gather comprehensive information.
 - Consolidate key findings while meticulously tracking the source(s) for each specific piece of information.
-- The output should be a well-written summary or report based on your search findings. 
+- The output should be a well-written summary or report based on your search findings.
 - Only include the information found in the search results, don't make up any information.
 
 Research Topic:
 {research_topic}
 """
 
-reflection_instructions = """You are an expert research assistant analyzing summaries about "{research_topic}".
+reflection_instructions = """**IMPORTANT: Respond in the same language as the user's input.**
+
+You are an expert research assistant analyzing summaries about "{research_topic}".
 
 Instructions:
 - Identify knowledge gaps or areas that need deeper exploration and generate a follow-up query. (1 or multiple).
@@ -79,11 +83,13 @@ def get_current_date():
 {summaries}
 """
 
-answer_instructions = """Generate a high-quality answer to the user's question based on the provided summaries.
+answer_instructions = """**IMPORTANT: Respond in the same language as the user's input.**
+
+Generate a high-quality answer to the user's question based on the provided summaries.
 
 Instructions:
 - The current date is {current_date}.
-- You are the final step of a multi-step research process, don't mention that you are the final step. 
+- You are the final step of a multi-step research process, don't mention that you are the final step.
 - You have access to all the information gathered from the previous steps.
 - You have access to the user's question.
 - Generate a high-quality answer to the user's question based on the provided summaries and the user's question.
@@ -94,3 +100,50 @@ def get_current_date():
 
 Summaries:
 {summaries}"""
+
+confirmation_question_instructions = """You are an advanced research-specialized AI assistant.
+
+**IMPORTANT: Respond in the same language as the user's input.**
+
+User's research request: {research_topic}
+
+To provide more accurate and useful information, you need to analyze this request and confirm technical details and specific requirements.
+
+Analysis perspectives:
+1. **Technology Stack**: Specific versions and configurations of technologies, tools, and frameworks being used
+2. **Environment and Constraints**: Execution environment (cloud/on-premise), resource constraints, integration with existing systems
+3. **Specific Requirements**: Quantities (number of clusters, users, etc.), performance requirements, security requirements
+4. **Implementation Details**: Usage of specific features, configuration details, customization needs
+5. **Goals and Deliverables**: Final objectives, expected outcomes, target audience or users
+
+Requirements for confirmation questions:
+- Ask for specific information needed for implementation regarding technical elements in the request
+- If there are multiple important confirmation items, organize them with bullet points into a single question
+- When there are technical choices, provide specific examples to prompt selection
+- Clarify ambiguous parts or parts that can be interpreted in multiple ways
+
+Output format:
+- "question": Confirmation question (specific question including technical details)
+- "rationale": Why this question is important (from a technical perspective)
+- "skip_instruction": Brief instruction explaining why you're asking this question and how the user can skip it (e.g., "This helps me provide more accurate information. You can say 'answer immediately' to skip.")
+
+Example 1:
+User request: "I want to create an e-commerce site with React and Next.js"
+```json
+{{
+    "question": "Could you tell me about the following points regarding the e-commerce site implementation?\\n\\n1. **Payment System**: Are you planning to use Stripe, PayPal, or domestic payment services?\\n2. **Product Management**: What's the scale of products (tens to tens of thousands) and do you plan to use a CMS (Contentful, Sanity, etc.)?\\n3. **Authentication Method**: Which authentication system are you considering - NextAuth.js, Auth0, Firebase Auth, etc.?\\n4. **Deployment Environment**: Are you planning to use Vercel, AWS, or another platform?\\n5. **Performance Requirements**: Do you have targets for concurrent users or page load times?",
+    "rationale": "E-commerce implementation requires many technical decisions including payment, inventory management, authentication, and performance. Understanding these details allows me to provide specific implementation methods and best practices.",
+    "skip_instruction": "This question helps me provide more accurate and tailored implementation guidance. You can say 'answer immediately without questions' to skip confirmation."
+}}
+```
+
+Example 2:
+User request: "Create multi-cluster with kind + k0smotron and create multiple hosted control planes with CAPI"
+```json
+{{
+    "question": "To create better documentation and blog posts for this advanced multi-cluster configuration, could you tell me about the following points?\\n\\n1. **Infrastructure Environment**: Are you planning a local (kind) verification environment or a production environment on cloud (AWS/GCP/Azure)?\\n2. **CAPI Provider**: Which infrastructure provider for Cluster API (Docker, vSphere, AWS, etc.) and what's the number and role distribution of hosted control planes?\\n3. **Network Configuration**: Any plans to use Service Mesh (Istio, etc.) and what's the inter-cluster network connection method (VPN, dedicated line, etc.)?\\n4. **Envoy Gateway Usage**: What's the specific use case - API Gateway, gRPC proxy, L7 load balancer, etc.?\\n5. **Projectsveltos Scope**: Are you planning specific namespace/service only or cross-cluster configuration management?\\n6. **Target Audience**: Technical details for Kubernetes experts or implementation procedures for intermediate users?",
+    "rationale": "This configuration is an advanced architecture combining multiple CNCF projects. Specific configurations and integration methods for each component vary greatly depending on environment and requirements, so understanding these details helps create practical documentation.",
+    "skip_instruction": "These questions help me create more accurate and practical documentation. You can say 'answer immediately' to skip this confirmation."
+}}
+```
+"""
diff --git a/backend/src/agent/state.py b/backend/src/agent/state.py
@@ -21,6 +21,8 @@ class OverallState(TypedDict):
     max_research_loops: int
     research_loop_count: int
     reasoning_model: str
+    confirmation_completed: bool  # Whether confirmation question has been completed
+    skip_confirmation: bool      # Whether to skip confirmation
 
 
 class ReflectionState(TypedDict):

diff --git a/backend/src/agent/tools_and_schemas.py b/backend/src/agent/tools_and_schemas.py
@@ -21,3 +21,10 @@ class Reflection(BaseModel):
     follow_up_queries: List[str] = Field(
         description="A list of follow-up queries to address the knowledge gap."
     )
+
+
+class ConfirmationQuestion(BaseModel):
+    """Schema for structured output of confirmation questions"""
+    question: str = Field(description="Confirmation question for the user")
+    rationale: str = Field(description="Reason why this question is important")
+    skip_instruction: str = Field(description="Brief instruction on how to skip confirmation, explaining why this question is being asked")