Skip to content

Commit

Permalink
Cherry pick changes (#1020)
Browse files Browse the repository at this point in the history
  • Loading branch information
ahuang11 authored Jan 31, 2025
1 parent 6199843 commit 60ea02f
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 12 deletions.
3 changes: 2 additions & 1 deletion lumen/ai/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class RetriesExceededError(Exception):
asyncio.CancelledError
)

SOURCE_TABLE_SEPARATOR = "<->"
SOURCE_TABLE_SEPARATOR = ":::"
PROVIDED_SOURCE_NAME = 'ProvidedSource00000'

pn.chat.ChatStep.min_width = 375
2 changes: 1 addition & 1 deletion lumen/ai/coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,7 @@ async def _compute_execution_graph(self, messages: list[Message], agents: dict[s
tool_names = [tool.name for tool in self._tools["__main__"]]
agent_names = [sagent.name[:-5] for sagent in agents.values()]

tables, tables_schema_str = await gather_table_sources(self._memory['sources'])
tables, tables_schema_str = await gather_table_sources(self._memory['sources'], include_provided=False)

reason_model, plan_model = self._get_model(
"main",
Expand Down
7 changes: 4 additions & 3 deletions lumen/ai/prompts/Planner/main.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ Ground Rules:
- Each of these experts requires certain information and has the ability to provide certain information.
- Do not perform tasks the user didn't ask for, e.g. do not plot the data unless requested or compute things if the user asked you to summarize the results in words.
- Ensure that you provide each expert the context they need to ensure they do not repeat previous steps.
- Do not go in depth to try to solve the problem, just make a plan and let the experts do the work.
{%- if tools %}
- Tools do not interact with a user, assign an expert to report, summarize or use the results.
- When looking up information with a tool ensure the expert comes AFTER the tool.
{%- endif %}

Agent Rules:
- The SQLAgent can generate and execute queries in a single step. DO NOT create two separate steps for generating the query and then executing it.
- The SQLAgent usually is followed by the AnalystAgent, which can help the user understand the results of the query.
Important Agent Rules:
- The SQLAgent can generate and execute multiple queries in a single step. DO NOT create two separate steps for generating the query and then executing it.
- The SQLAgent is a better candidate than TableListAgent if asked to show the table, and usually is followed by the AnalystAgent, which can help the user understand the results of the query.
- The ChatAgent usually can be used alone, but if the query is related to the data tables, please use AnalystAgent instead.
{% endblock -%}

Expand Down
4 changes: 2 additions & 2 deletions lumen/ai/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from panel.viewable import Viewer
from panel.widgets import Button, FileDownload, MultiChoice

from lumen.ai.config import SOURCE_TABLE_SEPARATOR
from lumen.ai.config import PROVIDED_SOURCE_NAME, SOURCE_TABLE_SEPARATOR

from ..pipeline import Pipeline
from ..sources import Source
Expand Down Expand Up @@ -258,7 +258,7 @@ def _resolve_data(self, data: DataT | list[DataT] | None):
source = DuckDBSource(
tables=tables, mirrors=mirrors,
uri=':memory:', initializers=initializers,
name='ProvidedSource00000'
name=PROVIDED_SOURCE_NAME
)
sources.append(source)
memory['sources'] = sources
Expand Down
24 changes: 19 additions & 5 deletions lumen/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@
from lumen.sources.duckdb import DuckDBSource

from ..util import log
from .config import PROMPTS_DIR, UNRECOVERABLE_ERRORS, RetriesExceededError
from .config import (
PROMPTS_DIR, PROVIDED_SOURCE_NAME, SOURCE_TABLE_SEPARATOR,
UNRECOVERABLE_ERRORS, RetriesExceededError,
)

if TYPE_CHECKING:
from panel.chat.step import ChatStep
Expand Down Expand Up @@ -342,22 +345,33 @@ def report_error(exc: Exception, step: ChatStep):
step.status = "failed"


async def gather_table_sources(sources: list[Source]) -> tuple[dict[str, Source], str]:
async def gather_table_sources(sources: list[Source], include_provided: bool = True, include_sep: bool = False) -> tuple[dict[str, Source], str]:
"""
Get a dictionary of tables to their respective sources
and a markdown string of the tables and their schemas.
Parameters
----------
sources : list[Source]
A list of sources to gather tables from.
include_provided : bool
Whether to include the provided source in the string; will always be included in the dictionary.
include_sep : bool
Whether to include the source separator in the string.
"""
tables_to_source = {}
tables_schema_str = ""
for source in sources:
for table in source.get_tables():
tables_to_source[table] = source
if isinstance(source, DuckDBSource) and source.ephemeral:
if source.name == PROVIDED_SOURCE_NAME and not include_provided:
continue
label = f"{SOURCE_TABLE_SEPARATOR}{source}{SOURCE_TABLE_SEPARATOR}{table}" if include_sep else table
if isinstance(source, DuckDBSource) and source.ephemeral or "Provided" in source.name:
sql = source.get_sql_expr(table)
schema = await get_schema(source, table, include_min_max=False, include_enum=True, limit=3)
tables_schema_str += f"- {table}\nSchema:\n```yaml\n{yaml.dump(schema)}```\nSQL:\n```sql\n{sql}\n```\n\n"
tables_schema_str += f"- {label}\nSchema:\n```yaml\n{yaml.dump(schema)}```\nSQL:\n```sql\n{sql}\n```\n\n"
else:
tables_schema_str += f"- {table}\n\n"
tables_schema_str += f"- {label}\n\n"
return tables_to_source, tables_schema_str.strip()


Expand Down

0 comments on commit 60ea02f

Please sign in to comment.