Add memory feature to log analyzer

NCKU-CSIE-Union · Jan 26, 2024 · 04f6633 · 04f6633
1 parent dc3cf86
commit 04f6633
Showing 1 changed file with 38 additions and 11 deletions.
diff --git a/ai2/log_analyzer.py b/ai2/log_analyzer.py
@@ -53,6 +53,8 @@ def __init__(self, pinecone_api_key: str, index_name: str, llm_args: dict) -> No
 This data is presented in CSV format and encompasses the most recent {time_span} minutes:
 {log_data}
 
+{memory}
+
 Some heuristic analysis has been performed beforehand, the following text is a summary of the analysis:
 {heuristic_analysis}
 
@@ -98,24 +100,48 @@ def analyze_log(self, log_df: pd.DataFrame) -> dict:
         # Perform heuristic analysis to aid the model
         heuristic_feedback = self._heuristic_analysis(log_df)
 
+        # Query a similar log from Pinecone, which acts as a memory for the model
+        log_embedding = self.embedding_model.get_embeddings(
+            [log_df.to_csv(index=False)]
+        )[0].values
+        matches = self.db.query(vector=log_embedding, top_k=1, include_metadata=True)[
+            "matches"
+        ]
+
+        # Generate memory if a log with high similarity score is found in past analysis
+        memory = ""
+        if len(matches) > 0 and matches[0]["score"] > 0.8:
+            similar_log = matches[0]
+            memory = f"""\
+We encountered a similar log in the past, with the following log data in CSV format:
+{similar_log['metadata']['log_csv']}
+
+The following text delimited by triple backticks is the chat history between a human and an AI, which is a summary of the analysis and scaling action taken in the past:
+```
+analysis: {similar_log['metadata']['chat_memory']}
+```
+"""
+
+        # Define input variables for the prompt template
+        input_variables = {
+            "log_data": log_df.to_csv(index=False),
+            "memory": memory,
+            "time_span": len(log_df),
+            "heuristic_analysis": heuristic_feedback,
+            "format_instruction": self.format_instruction,
+        }
+
         # Invoke the model
-        prompt = self.prompt_template.format_prompt(
-            log_data=log_df.to_csv(index=False),
-            time_span=len(log_df),
-            heuristic_analysis=heuristic_feedback,
-            format_instruction=self.format_instruction,
-        )
+        prompt = self.prompt_template.format_prompt(**input_variables)
         feedback = self.llm.invoke(prompt)
 
         # Parse the output
         feedback_dict = self.output_parser.parse(feedback)
 
         # Add the prompt to the feedback, note that the prompt here does not include format instruction, to avoid unexpected response when chatting
+        input_variables["format_instruction"] = ""
         feedback_dict["prompt"] = self.prompt_template.format_prompt(
-            log_data=log_df.to_csv(index=False),
-            time_span=len(log_df),
-            heuristic_analysis=heuristic_feedback,
-            format_instruction="",
+            **input_variables
         ).text
 
         feedback_dict["metric_dataframe"] = log_df
@@ -159,7 +185,8 @@ def chat(self, id: str, prompt: str) -> str:
         response = self.llm.invoke(
             f"""\
 Prompt after formatting:
-The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
+The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. \
+Please assume that the human is more experienced than the AI, and the AI should trust the human's judgement.
 
 Current conversation: