Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug]: view is not working with NLSQLTableQueryEngine even after modify the initialization of the SQLDatabase to include view_support=True #14514

Open
akashkumar398 opened this issue Jul 2, 2024 · 1 comment
Labels
bug Something isn't working triage Issue needs to be triaged/prioritized

Comments

@akashkumar398
Copy link

Bug Description

view is not working with NLSQLTableQueryEngine even after modifying the initialization of the SQL database to include view_support=True

Version

llama-index==0.10.50

Steps to Reproduce

from llama_index.core import (
SQLDatabase,
VectorStoreIndex,
Settings,
)
from llama_index.llms.openai import OpenAI
from llama_index.core.query_engine import NLSQLTableQueryEngine
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler

Set OpenAI API key

openai.api_key = os.environ["OPENAI_API_KEY"]

engine = initialize_database_connection()

tables = ["customer_information_enquiry", "view_table"]
sql_database = SQLDatabase(engine, include_tables=tables, sample_rows_in_table_info=1, view_support=True)

MODEL = 'gpt-3.5-turbo-0125'

token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model(MODEL).encode
)
callback_manager = CallbackManager([token_counter])

llm = OpenAI(temperature=0, model=MODEL)
Settings.llm = llm
Settings.callback_manager = callback_manager

query_engine = NLSQLTableQueryEngine(
sql_database=sql_database,

TypeError Traceback (most recent call last)
Cell In[12], line 103
99 engine = initialize_database_connection()
101 tables = ["customer_information_enquiry","vw_jobs"]
--> 103 sql_database = SQLDatabase(engine, include_tables=tables, sample_rows_in_table_info=1, view_support=True)
104 # sql_database = SQLDatabase(engine, include_tables=tables, sample_rows_in_table_info=1)
106 MODEL = 'gpt-3.5-turbo-0125'

File ~\anaconda3\envs\dbgpt\lib\site-packages\llama_index\core\utilities\sql_wrapper.py:110, in SQLDatabase.init(self, engine, schema, metadata, ignore_tables, include_tables, sample_rows_in_table_info, indexes_in_table_info, custom_table_info, view_support, max_string_length)
108 self._metadata = metadata or MetaData()
109 # including view support if view_support = true
--> 110 self._metadata.reflect(
111 views=view_support,
112 bind=self._engine,
113 only=list(self._usable_tables),
114 schema=self._schema,
115 )

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\sql\schema.py:5782, in MetaData.reflect(self, bind, schema, views, only, extend_existing, autoload_replace, resolve_fks, **dialect_kwargs)
5775 load = [
5776 name
5777 for name in only
5778 if extend_existing or name not in current
5779 ]
5780 # pass the available tables so the inspector can
5781 # choose to ignore the filter_names
-> 5782 _reflect_info = insp._get_reflection_info(
5783 schema=schema,
5784 filter_names=load,
5785 available=available,
5786 kind=kind,
5787 scope=util.preloaded.engine_reflection.ObjectScope.ANY,
5788 **dialect_kwargs,
5789 )
5790 reflect_opts["_reflect_info"] = _reflect_info
5792 for name in load:

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:2008, in Inspector._get_reflection_info(self, schema, filter_names, available, _reflect_info, **kw)
2004 res = {}
2005 return res
2007 info = _ReflectionInfo(
-> 2008 columns=run(
2009 self.get_multi_columns, check_filter_names_from_meth=True
2010 ),
2011 pk_constraint=run(self.get_multi_pk_constraint),
2012 foreign_keys=run(self.get_multi_foreign_keys),
2013 indexes=run(self.get_multi_indexes),
2014 unique_constraints=run(
2015 self.get_multi_unique_constraints, optional=True
2016 ),
2017 table_comment=run(self.get_multi_table_comment, optional=True),
2018 check_constraints=run(
2019 self.get_multi_check_constraints, optional=True
2020 ),
2021 table_options=run(self.get_multi_table_options, optional=True),
2022 unreflectable=unreflectable,
2023 )
2024 if _reflect_info:
2025 _reflect_info.update(info)

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:1994, in Inspector._get_reflection_info..run(meth, optional, check_filter_names_from_meth)
1992 try:
1993 if has_result:
-> 1994 res = meth(filter_names=_fn, **kw)
1995 if check_filter_names_from_meth and not res:
1996 # method returned no result data.
1997 # skip any future call methods
1998 has_result = False

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:921, in Inspector.get_multi_columns(self, schema, filter_names, kind, scope, **kw)
884 r"""Return information about columns in all objects in the given
885 schema.
886
(...)
917 .. seealso:: :meth:Inspector.get_columns
918 """
920 with self._operation_context() as conn:
--> 921 table_col_defs = dict(
922 self.dialect.get_multi_columns(
923 conn,
924 schema=schema,
925 filter_names=filter_names,
926 kind=kind,
927 scope=scope,
928 info_cache=self.info_cache,
929 **kw,
930 )
931 )
932 self._instantiate_types(table_col_defs.values())
933 return table_col_defs

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\default.py:1098, in DefaultDialect._default_multi_reflect(self, single_tbl_method, connection, kind, schema, filter_names, scope, **kw)
1094 key = (schema, table)
1095 try:
1096 yield (
1097 key,
-> 1098 single_tbl_method(
1099 connection, table, schema=schema, **kw
1100 ),
1101 )
1102 except exc.UnreflectableTableError as err:
1103 if key not in unreflectable:

File :2, in get_columns(self, connection, table_name, schema, **kw)

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:97, in cache(fn, self, con, *args, **kw)
95 ret: _R = info_cache.get(key)
96 if ret is None:
---> 97 ret = fn(self, con, *args, **kw)
98 info_cache[key] = ret
99 return ret

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\dialects\mysql\base.py:2917, in MySQLDialect.get_columns(self, connection, table_name, schema, **kw)
2915 @reflection.cache
2916 def get_columns(self, connection, table_name, schema=None, **kw):
-> 2917 parsed_state = self._parsed_state_or_create(
2918 connection, table_name, schema, **kw
2919 )
2920 if parsed_state.columns:
2921 return parsed_state.columns

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\dialects\mysql\base.py:3177, in MySQLDialect._parsed_state_or_create(self, connection, table_name, schema, **kw)
3174 def _parsed_state_or_create(
3175 self, connection, table_name, schema=None, **kw
3176 ):
-> 3177 return self._setup_parser(
3178 connection,
3179 table_name,
3180 schema,
3181 info_cache=kw.get("info_cache", None),
3182 )

File :2, in _setup_parser(self, connection, table_name, schema, **kw)

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:97, in cache(fn, self, con, *args, **kw)
95 ret: _R = info_cache.get(key)
96 if ret is None:
---> 97 ret = fn(self, con, *args, **kw)
98 info_cache[key] = ret
99 return ret

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\dialects\mysql\base.py:3212, in MySQLDialect._setup_parser(self, connection, table_name, schema, **kw)
3207 if parser._check_view(sql):
3208 # Adapt views to something table-like.
3209 columns = self._describe_table(
3210 connection, None, charset, full_name=full_name
3211 )
-> 3212 sql = parser._describe_to_create(table_name, columns)
3213 return parser.parse(sql, charset)

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\dialects\mysql\reflection.py:350, in MySQLTableDefinitionParser._describe_to_create(self, table_name, columns)
348 line.append("NOT NULL")
349 if default:
--> 350 if "auto_increment" in default:
351 pass
352 elif col_type.startswith("timestamp") and default.startswith(
353 "C"
354 ):

TypeError: a bytes-like object is required, not 'str'

it's giving an error

Relevant Logs/Tracbacks

No response

@akashkumar398 akashkumar398 added bug Something isn't working triage Issue needs to be triaged/prioritized labels Jul 2, 2024
Copy link

dosubot bot commented Jul 2, 2024

The issue you're encountering with the NLSQLTableQueryEngine not working with views, even after setting view_support=True in the SQLDatabase initialization, is due to the fact that the NLSQLTableQueryEngine class does not have any implementation or parameter that directly handles or checks for view_support. The view_support parameter is correctly handled by the SQLDatabase class during initialization and schema reflection, but this support does not automatically extend to the NLSQLTableQueryEngine.

To support views in the NLSQLTableQueryEngine, you need to ensure that the NLSQLRetriever and other relevant components are properly configured to handle views. Here are the steps you can follow:

  1. Include Views in Table List: Ensure that the views are included in the list of tables that the NLSQLRetriever can query. This can be done by modifying the tables parameter when initializing the NLSQLRetriever.

  2. Get Table Context: The _get_table_context method should be able to retrieve the schema information for views as well as tables. This method constructs the context string that includes the schema information for the tables/views.

Here is an example of how you might adjust the initialization to include views:

from sqlalchemy import Table, MetaData

# Assuming `sql_database` is an instance of SQLDatabase
metadata = MetaData(bind=sql_database.engine)
metadata.reflect(views=True)  # Reflect both tables and views

# Get all table and view names
all_tables_and_views = [Table(t, metadata) for t in metadata.tables.keys()]

# Initialize NLSQLRetriever with the list of tables and views
nlsql_retriever = NLSQLRetriever(
    sql_database=sql_database,
    tables=all_tables_and_views,
    # other parameters...
)

By including views in the tables parameter, the NLSQLRetriever will be able to query them just like regular tables. Additionally, ensure that the _get_table_context method in the NLSQLRetriever class is capable of retrieving and constructing the schema information for both tables and views.

Moreover, the NLSQLTableQueryEngine class, which uses NLSQLRetriever, should be configured properly to handle the views. The NLSQLTableQueryEngine class initializes the NLSQLRetriever with the necessary parameters, including the list of tables and views. Ensure that the tables parameter passed to NLSQLTableQueryEngine includes the views as well.

class NLSQLTableQueryEngine(BaseSQLTableQueryEngine):
    def __init__(
        self,
        sql_database: SQLDatabase,
        text_to_sql_prompt: Optional[BasePromptTemplate] = None,
        context_query_kwargs: Optional[dict] = None,
        synthesize_response: bool = True,
        response_synthesis_prompt: Optional[BasePromptTemplate] = None,
        tables: Optional[Union[List[str], List[Table]]] = None,
        service_context: Optional[ServiceContext] = None,
        context_str_prefix: Optional[str] = None,
        sql_only: bool = False,
        verbose: bool = False,
        **kwargs: Any,
    ) -> None:
        self._sql_retriever = NLSQLRetriever(
            sql_database,
            text_to_sql_prompt=text_to_sql_prompt,
            context_query_kwargs=context_query_kwargs,
            tables=tables,
            context_str_prefix=context_str_prefix,
            service_context=service_context,
            sql_only=sql_only,
            verbose=verbose,
        )
        super().__init__(
            synthesize_response=synthesize_response,
            response_synthesis_prompt=response_synthesis_prompt,
            service_context=service_context,
            verbose=verbose,
            **kwargs,
        )

    @property
    def sql_retriever(self) -> NLSQLRetriever:
        return self._sql_retriever

By ensuring that the views are included in the tables parameter and that the _get_table_context method can handle views, the NLSQLRetriever and NLSQLTableQueryEngine will be able to support SQL views effectively [1][2][3][4].

To continue talking to Dosu, mention @dosu.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working triage Issue needs to be triaged/prioritized
Projects
None yet
Development

No branches or pull requests

1 participant