Cherry pick changes (#1020)

holoviz · Jan 31, 2025 · 60ea02f · 60ea02f
1 parent 6199843
commit 60ea02f
Show file tree

Hide file tree

Showing 5 changed files with 28 additions and 12 deletions.
diff --git a/lumen/ai/config.py b/lumen/ai/config.py
@@ -47,6 +47,7 @@ class RetriesExceededError(Exception):
     asyncio.CancelledError
 )
 
-SOURCE_TABLE_SEPARATOR = "<->"
+SOURCE_TABLE_SEPARATOR = ":::"
+PROVIDED_SOURCE_NAME = 'ProvidedSource00000'
 
 pn.chat.ChatStep.min_width = 375
diff --git a/lumen/ai/coordinator.py b/lumen/ai/coordinator.py
@@ -796,7 +796,7 @@ async def _compute_execution_graph(self, messages: list[Message], agents: dict[s
         tool_names = [tool.name for tool in self._tools["__main__"]]
         agent_names = [sagent.name[:-5] for sagent in agents.values()]
 
-        tables, tables_schema_str = await gather_table_sources(self._memory['sources'])
+        tables, tables_schema_str = await gather_table_sources(self._memory['sources'], include_provided=False)
 
         reason_model, plan_model = self._get_model(
             "main",

diff --git a/lumen/ai/prompts/Planner/main.jinja2 b/lumen/ai/prompts/Planner/main.jinja2
@@ -7,14 +7,15 @@ Ground Rules:
 - Each of these experts requires certain information and has the ability to provide certain information.
 - Do not perform tasks the user didn't ask for, e.g. do not plot the data unless requested or compute things if the user asked you to summarize the results in words.
 - Ensure that you provide each expert the context they need to ensure they do not repeat previous steps.
+- Do not go in depth to try to solve the problem, just make a plan and let the experts do the work.
 {%- if tools %}
 - Tools do not interact with a user, assign an expert to report, summarize or use the results.
 - When looking up information with a tool ensure the expert comes AFTER the tool.
 {%- endif %}
 
-Agent Rules:
-- The SQLAgent can generate and execute queries in a single step. DO NOT create two separate steps for generating the query and then executing it.
-- The SQLAgent usually is followed by the AnalystAgent, which can help the user understand the results of the query.
+Important Agent Rules:
+- The SQLAgent can generate and execute multiple queries in a single step. DO NOT create two separate steps for generating the query and then executing it.
+- The SQLAgent is a better candidate than TableListAgent if asked to show the table, and usually is followed by the AnalystAgent, which can help the user understand the results of the query.
 - The ChatAgent usually can be used alone, but if the query is related to the data tables, please use AnalystAgent instead.
 {% endblock -%}
 

diff --git a/lumen/ai/ui.py b/lumen/ai/ui.py
@@ -27,7 +27,7 @@
 from panel.viewable import Viewer
 from panel.widgets import Button, FileDownload, MultiChoice
 
-from lumen.ai.config import SOURCE_TABLE_SEPARATOR
+from lumen.ai.config import PROVIDED_SOURCE_NAME, SOURCE_TABLE_SEPARATOR
 
 from ..pipeline import Pipeline
 from ..sources import Source
@@ -258,7 +258,7 @@ def _resolve_data(self, data: DataT | list[DataT] | None):
             source = DuckDBSource(
                 tables=tables, mirrors=mirrors,
                 uri=':memory:', initializers=initializers,
-                name='ProvidedSource00000'
+                name=PROVIDED_SOURCE_NAME
             )
             sources.append(source)
         memory['sources'] = sources

diff --git a/lumen/ai/utils.py b/lumen/ai/utils.py
@@ -27,7 +27,10 @@
 from lumen.sources.duckdb import DuckDBSource
 
 from ..util import log
-from .config import PROMPTS_DIR, UNRECOVERABLE_ERRORS, RetriesExceededError
+from .config import (
+    PROMPTS_DIR, PROVIDED_SOURCE_NAME, SOURCE_TABLE_SEPARATOR,
+    UNRECOVERABLE_ERRORS, RetriesExceededError,
+)
 
 if TYPE_CHECKING:
     from panel.chat.step import ChatStep
@@ -342,22 +345,33 @@ def report_error(exc: Exception, step: ChatStep):
     step.status = "failed"
 
 
-async def gather_table_sources(sources: list[Source]) -> tuple[dict[str, Source], str]:
+async def gather_table_sources(sources: list[Source], include_provided: bool = True, include_sep: bool = False) -> tuple[dict[str, Source], str]:
     """
     Get a dictionary of tables to their respective sources
     and a markdown string of the tables and their schemas.
+    Parameters
+    ----------
+    sources : list[Source]
+        A list of sources to gather tables from.
+    include_provided : bool
+        Whether to include the provided source in the string; will always be included in the dictionary.
+    include_sep : bool
+        Whether to include the source separator in the string.
     """
     tables_to_source = {}
     tables_schema_str = ""
     for source in sources:
         for table in source.get_tables():
             tables_to_source[table] = source
-            if isinstance(source, DuckDBSource) and source.ephemeral:
+            if source.name == PROVIDED_SOURCE_NAME and not include_provided:
+                continue
+            label = f"{SOURCE_TABLE_SEPARATOR}{source}{SOURCE_TABLE_SEPARATOR}{table}" if include_sep else table
+            if isinstance(source, DuckDBSource) and source.ephemeral or "Provided" in source.name:
                 sql = source.get_sql_expr(table)
                 schema = await get_schema(source, table, include_min_max=False, include_enum=True, limit=3)
-                tables_schema_str += f"- {table}\nSchema:\n```yaml\n{yaml.dump(schema)}```\nSQL:\n```sql\n{sql}\n```\n\n"
+                tables_schema_str += f"- {label}\nSchema:\n```yaml\n{yaml.dump(schema)}```\nSQL:\n```sql\n{sql}\n```\n\n"
             else:
-                tables_schema_str += f"- {table}\n\n"
+                tables_schema_str += f"- {label}\n\n"
     return tables_to_source, tables_schema_str.strip()