diff --git a/docs/docs/databases/installing-database-drivers.mdx b/docs/docs/databases/installing-database-drivers.mdx index b4be939c3b5a8..5599b35900aae 100644 --- a/docs/docs/databases/installing-database-drivers.mdx +++ b/docs/docs/databases/installing-database-drivers.mdx @@ -52,12 +52,12 @@ Some of the recommended packages are shown below. Please refer to [setup.py](htt | [Oracle](/docs/databases/oracle) | `pip install cx_Oracle` | `oracle://` | | [PostgreSQL](/docs/databases/postgres) | `pip install psycopg2` | `postgresql://:@/` | | [Presto](/docs/databases/presto) | `pip install pyhive` | `presto://` | -| [Rockset](/docs/databases/rockset) | `pip install rockset-sqlalchemy` | `rockset://:@` | +| [Rockset](/docs/databases/rockset) | `pip install rockset-sqlalchemy` | `rockset://:@` | [SAP Hana](/docs/databases/hana) | `pip install hdbcli sqlalchemy-hana or pip install apache-superset[hana]` | `hana://{username}:{password}@{host}:{port}` | | [StarRocks](/docs/databases/starrocks) | `pip install starrocks` | `starrocks://:@:/.` | | [Snowflake](/docs/databases/snowflake) | `pip install snowflake-sqlalchemy` | `snowflake://{user}:{password}@{account}.{region}/{database}?role={role}&warehouse={warehouse}` | -| SQLite | No additional library needed | `sqlite://path/to/file.db?check_same_thread=false` | -| [SQL Server](/docs/databases/sql-server) | `pip install pymssql` | `mssql+pymssql://` | +| SQLite | No additional library needed | `sqlite://` | +| [SQL Server](/docs/databases/sql-server) | `pip install pymssql` | `mssql+pymssql://` | | [Teradata](/docs/databases/teradata) | `pip install teradatasqlalchemy` | `teradatasql://{user}:{password}@{host}` | | [TimescaleDB](/docs/databases/timescaledb) | `pip install psycopg2` | `postgresql://:@:/` | | [Trino](/docs/databases/trino) | `pip install trino` | `trino://{username}:{password}@{hostname}:{port}/{catalog}` | diff --git a/docs/docs/frequently-asked-questions.mdx b/docs/docs/frequently-asked-questions.mdx index df4ee7a442510..a289222b40a8e 100644 --- a/docs/docs/frequently-asked-questions.mdx +++ b/docs/docs/frequently-asked-questions.mdx @@ -142,7 +142,7 @@ Another workaround is to change where superset stores the sqlite database by add `superset_config.py`: ``` -SQLALCHEMY_DATABASE_URI = 'sqlite:////new/location/superset.db?check_same_thread=false' +SQLALCHEMY_DATABASE_URI = 'sqlite:////new/location/superset.db' ``` You can read more about customizing Superset using the configuration file diff --git a/docs/docs/installation/configuring-superset.mdx b/docs/docs/installation/configuring-superset.mdx index 155f82e07a94e..01e6274bbb28f 100644 --- a/docs/docs/installation/configuring-superset.mdx +++ b/docs/docs/installation/configuring-superset.mdx @@ -45,9 +45,7 @@ SECRET_KEY = 'YOUR_OWN_RANDOM_GENERATED_SECRET_KEY' # superset metadata (slices, connections, tables, dashboards, ...). # Note that the connection information to connect to the datasources # you want to explore are managed directly in the web UI -# The check_same_thread=false property ensures the sqlite client does not attempt -# to enforce single-threaded access, which may be problematic in some edge cases -SQLALCHEMY_DATABASE_URI = 'sqlite:////path/to/superset.db?check_same_thread=false' +SQLALCHEMY_DATABASE_URI = 'sqlite:////path/to/superset.db' # Flask-WTF flag for CSRF WTF_CSRF_ENABLED = True diff --git a/superset/config.py b/superset/config.py index 936c8d6d4b6fb..9035269d43123 100644 --- a/superset/config.py +++ b/superset/config.py @@ -184,10 +184,7 @@ def _try_json_readsha(filepath: str, length: int) -> str | None: SECRET_KEY = os.environ.get("SUPERSET_SECRET_KEY") or CHANGE_ME_SECRET_KEY # The SQLAlchemy connection string. -SQLALCHEMY_DATABASE_URI = ( - f"""sqlite:///{os.path.join(DATA_DIR, "superset.db")}?check_same_thread=false""" -) - +SQLALCHEMY_DATABASE_URI = "sqlite:///" + os.path.join(DATA_DIR, "superset.db") # SQLALCHEMY_DATABASE_URI = 'mysql://myapp@localhost/myapp' # SQLALCHEMY_DATABASE_URI = 'postgresql://root:password@localhost/myapp' diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index f355e4ef8cea8..9d6405e91b512 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -1066,24 +1066,6 @@ def handle_cursor(cls, cursor: Any, query: Query, session: Session) -> None: query object""" # TODO: Fix circular import error caused by importing sql_lab.Query - @classmethod - def execute_with_cursor( - cls, cursor: Any, sql: str, query: Query, session: Session - ) -> None: - """ - Trigger execution of a query and handle the resulting cursor. - - For most implementations this just makes calls to `execute` and - `handle_cursor` consecutively, but in some engines (e.g. Trino) we may - need to handle client limitations such as lack of async support and - perform a more complicated operation to get information from the cursor - in a timely manner and facilitate operations such as query stop - """ - logger.debug("Query %d: Running query: %s", query.id, sql) - cls.execute(cursor, sql, async_=True) - logger.debug("Query %d: Handling cursor", query.id) - cls.handle_cursor(cursor, query, session) - @classmethod def extract_error_message(cls, ex: Exception) -> str: return f"{cls.engine} error: {cls._extract_error_message(ex)}" diff --git a/superset/db_engine_specs/trino.py b/superset/db_engine_specs/trino.py index 425137e302e6b..fb03a725bc418 100644 --- a/superset/db_engine_specs/trino.py +++ b/superset/db_engine_specs/trino.py @@ -18,8 +18,6 @@ import contextlib import logging -import threading -import time from typing import Any, TYPE_CHECKING import simplejson as json @@ -153,22 +151,15 @@ def get_tracking_url(cls, cursor: Cursor) -> str | None: @classmethod def handle_cursor(cls, cursor: Cursor, query: Query, session: Session) -> None: - """ - Handle a trino client cursor. - - WARNING: if you execute a query, it will block until complete and you - will not be able to handle the cursor until complete. Use - `execute_with_cursor` instead, to handle this asynchronously. - """ - - # Adds the executed query id to the extra payload so the query can be cancelled - cancel_query_id = cursor.query_id - logger.debug("Query %d: queryId %s found in cursor", query.id, cancel_query_id) - query.set_extra_json_key(key=QUERY_CANCEL_KEY, value=cancel_query_id) - if tracking_url := cls.get_tracking_url(cursor): query.tracking_url = tracking_url + # Adds the executed query id to the extra payload so the query can be cancelled + query.set_extra_json_key( + key=QUERY_CANCEL_KEY, + value=(cancel_query_id := cursor.stats["queryId"]), + ) + session.commit() # if query cancelation was requested prior to the handle_cursor call, but @@ -182,51 +173,6 @@ def handle_cursor(cls, cursor: Cursor, query: Query, session: Session) -> None: super().handle_cursor(cursor=cursor, query=query, session=session) - @classmethod - def execute_with_cursor( - cls, cursor: Any, sql: str, query: Query, session: Session - ) -> None: - """ - Trigger execution of a query and handle the resulting cursor. - - Trino's client blocks until the query is complete, so we need to run it - in another thread and invoke `handle_cursor` to poll for the query ID - to appear on the cursor in parallel. - """ - execute_result: dict[str, Any] = {} - - def _execute(results: dict[str, Any]) -> None: - logger.debug("Query %d: Running query: %s", query.id, sql) - - # Pass result / exception information back to the parent thread - try: - cls.execute(cursor, sql) - results["complete"] = True - except Exception as ex: # pylint: disable=broad-except - results["complete"] = True - results["error"] = ex - - execute_thread = threading.Thread(target=_execute, args=(execute_result,)) - execute_thread.start() - - # Wait for a query ID to be available before handling the cursor, as - # it's required by that method; it may never become available on error. - while not cursor.query_id and not execute_result.get("complete"): - time.sleep(0.1) - - logger.debug("Query %d: Handling cursor", query.id) - cls.handle_cursor(cursor, query, session) - - # Block until the query completes; same behaviour as the client itself - logger.debug("Query %d: Waiting for query to complete", query.id) - while not execute_result.get("complete"): - time.sleep(0.5) - - # Unfortunately we'll mangle the stack trace due to the thread, but - # throwing the original exception allows mapping database errors as normal - if err := execute_result.get("error"): - raise err - @classmethod def prepare_cancel_query(cls, query: Query, session: Session) -> None: if QUERY_CANCEL_KEY not in query.extra: diff --git a/superset/sql_lab.py b/superset/sql_lab.py index 4d71e23d88cee..196b48b1d2155 100644 --- a/superset/sql_lab.py +++ b/superset/sql_lab.py @@ -191,7 +191,7 @@ def get_sql_results( # pylint: disable=too-many-arguments return handle_query_error(ex, query, session) -def execute_sql_statement( # pylint: disable=too-many-arguments +def execute_sql_statement( # pylint: disable=too-many-arguments,too-many-statements sql_statement: str, query: Query, session: Session, @@ -271,7 +271,10 @@ def execute_sql_statement( # pylint: disable=too-many-arguments ) session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): - db_engine_spec.execute_with_cursor(cursor, sql, query, session) + logger.debug("Query %d: Running query: %s", query.id, sql) + db_engine_spec.execute(cursor, sql, async_=True) + logger.debug("Query %d: Handling cursor", query.id) + db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( diff --git a/tests/unit_tests/db_engine_specs/test_trino.py b/tests/unit_tests/db_engine_specs/test_trino.py index 1b50a683a0841..963953d18b48e 100644 --- a/tests/unit_tests/db_engine_specs/test_trino.py +++ b/tests/unit_tests/db_engine_specs/test_trino.py @@ -352,7 +352,7 @@ def test_handle_cursor_early_cancel( query_id = "myQueryId" cursor_mock = engine_mock.return_value.__enter__.return_value - cursor_mock.query_id = query_id + cursor_mock.stats = {"queryId": query_id} session_mock = mocker.MagicMock() query = Query() @@ -366,32 +366,3 @@ def test_handle_cursor_early_cancel( assert cancel_query_mock.call_args[1]["cancel_query_id"] == query_id else: assert cancel_query_mock.call_args is None - - -def test_execute_with_cursor_in_parallel(mocker: MockerFixture): - """Test that `execute_with_cursor` fetches query ID from the cursor""" - from superset.db_engine_specs.trino import TrinoEngineSpec - - query_id = "myQueryId" - - mock_cursor = mocker.MagicMock() - mock_cursor.query_id = None - - mock_query = mocker.MagicMock() - mock_session = mocker.MagicMock() - - def _mock_execute(*args, **kwargs): - mock_cursor.query_id = query_id - - mock_cursor.execute.side_effect = _mock_execute - - TrinoEngineSpec.execute_with_cursor( - cursor=mock_cursor, - sql="SELECT 1 FROM foo", - query=mock_query, - session=mock_session, - ) - - mock_query.set_extra_json_key.assert_called_once_with( - key=QUERY_CANCEL_KEY, value=query_id - ) diff --git a/tests/unit_tests/sql_lab_test.py b/tests/unit_tests/sql_lab_test.py index edc1fd2ec4a5d..29f45eab682a0 100644 --- a/tests/unit_tests/sql_lab_test.py +++ b/tests/unit_tests/sql_lab_test.py @@ -55,8 +55,8 @@ def test_execute_sql_statement(mocker: MockerFixture, app: None) -> None: ) database.apply_limit_to_sql.assert_called_with("SELECT 42 AS answer", 2, force=True) - db_engine_spec.execute_with_cursor.assert_called_with( - cursor, "SELECT 42 AS answer LIMIT 2", query, session + db_engine_spec.execute.assert_called_with( + cursor, "SELECT 42 AS answer LIMIT 2", async_=True ) SupersetResultSet.assert_called_with([(42,)], cursor.description, db_engine_spec) @@ -106,8 +106,10 @@ def test_execute_sql_statement_with_rls( 101, force=True, ) - db_engine_spec.execute_with_cursor.assert_called_with( - cursor, "SELECT * FROM sales WHERE organization_id=42 LIMIT 101", query, session + db_engine_spec.execute.assert_called_with( + cursor, + "SELECT * FROM sales WHERE organization_id=42 LIMIT 101", + async_=True, ) SupersetResultSet.assert_called_with([(42,)], cursor.description, db_engine_spec)