Skip to content

Commit

Permalink
fix(SQL Lab): hang when result set size is too big (apache#30522)
Browse files Browse the repository at this point in the history
Co-authored-by: aadhikari <[email protected]>
Co-authored-by: Ville Brofeldt <[email protected]>
  • Loading branch information
3 people authored Oct 15, 2024
1 parent 0e9c0f6 commit 6ede327
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 3 deletions.
3 changes: 2 additions & 1 deletion docs/static/resources/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@
"GENERIC_BACKEND_ERROR",
"INVALID_PAYLOAD_FORMAT_ERROR",
"INVALID_PAYLOAD_SCHEMA_ERROR",
"REPORT_NOTIFICATION_ERROR"
"REPORT_NOTIFICATION_ERROR",
"RESULT_TOO_LARGE_ERROR"
],
"type": "string"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ export const ErrorTypeEnum = {
ASYNC_WORKERS_ERROR: 'ASYNC_WORKERS_ERROR',
ADHOC_SUBQUERY_NOT_ALLOWED_ERROR: 'ADHOC_SUBQUERY_NOT_ALLOWED_ERROR',
INVALID_SQL_ERROR: 'INVALID_SQL_ERROR',
RESULT_TOO_LARGE_ERROR: 'RESULT_TOO_LARGE_ERROR',

// Generic errors
GENERIC_COMMAND_ERROR: 'GENERIC_COMMAND_ERROR',
Expand Down
4 changes: 4 additions & 0 deletions superset-frontend/src/setup/setupErrorMessages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,5 +159,9 @@ export default function setupErrorMessages() {
ErrorTypeEnum.INVALID_SQL_ERROR,
InvalidSQLErrorMessage,
);
errorMessageComponentRegistry.registerValue(
ErrorTypeEnum.RESULT_TOO_LARGE_ERROR,
DatabaseErrorMessage,
);
setupErrorMessagesExtra();
}
3 changes: 3 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,9 @@ class D3TimeFormat(TypedDict, total=False):
SQLLAB_SAVE_WARNING_MESSAGE = None
SQLLAB_SCHEDULE_WARNING_MESSAGE = None

# Max payload size (MB) for SQL Lab to prevent browser hangs with large results.
SQLLAB_PAYLOAD_MAX_MB = None

# Force refresh while auto-refresh in dashboard
DASHBOARD_AUTO_REFRESH_MODE: Literal["fetch", "force"] = "force"
# Dashboard auto refresh intervals
Expand Down
3 changes: 3 additions & 0 deletions superset/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class SupersetErrorType(StrEnum):
ASYNC_WORKERS_ERROR = "ASYNC_WORKERS_ERROR"
ADHOC_SUBQUERY_NOT_ALLOWED_ERROR = "ADHOC_SUBQUERY_NOT_ALLOWED_ERROR"
INVALID_SQL_ERROR = "INVALID_SQL_ERROR"
RESULT_TOO_LARGE_ERROR = "RESULT_TOO_LARGE_ERROR"

# Generic errors
GENERIC_COMMAND_ERROR = "GENERIC_COMMAND_ERROR"
Expand Down Expand Up @@ -151,6 +152,7 @@ class SupersetErrorType(StrEnum):
1036: _("The database was deleted."),
1037: _("Custom SQL fields cannot contain sub-queries."),
1040: _("The submitted payload failed validation."),
1041: _("The result size exceeds the allowed limit."),
}


Expand Down Expand Up @@ -190,6 +192,7 @@ class SupersetErrorType(StrEnum):
SupersetErrorType.DATABASE_NOT_FOUND_ERROR: [1011, 1036],
SupersetErrorType.CONNECTION_DATABASE_TIMEOUT: [1001, 1009],
SupersetErrorType.MARSHMALLOW_ERROR: [1040],
SupersetErrorType.RESULT_TOO_LARGE_ERROR: [1041],
}


Expand Down
36 changes: 36 additions & 0 deletions superset/sql_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# pylint: disable=consider-using-transaction
import dataclasses
import logging
import sys
import uuid
from contextlib import closing
from datetime import datetime
Expand Down Expand Up @@ -78,6 +79,7 @@
SQLLAB_CTAS_NO_LIMIT = config["SQLLAB_CTAS_NO_LIMIT"]
log_query = config["QUERY_LOGGER"]
logger = logging.getLogger(__name__)
BYTES_IN_MB = 1024 * 1024


class SqlLabException(Exception):
Expand Down Expand Up @@ -531,6 +533,7 @@ def execute_sql_statements(
log_params,
apply_ctas,
)

except SqlLabQueryStoppedException:
payload.update({"status": QueryStatus.STOPPED})
return payload
Expand Down Expand Up @@ -601,6 +604,22 @@ def execute_sql_statements(
serialized_payload = _serialize_payload(
payload, cast(bool, results_backend_use_msgpack)
)

# Check the size of the serialized payload
if sql_lab_payload_max_mb := config.get("SQLLAB_PAYLOAD_MAX_MB"):
serialized_payload_size = sys.getsizeof(serialized_payload)
max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB

if serialized_payload_size > max_bytes:
logger.info("Result size exceeds the allowed limit.")
raise SupersetErrorException(
SupersetError(
message=f"Result size ({serialized_payload_size / BYTES_IN_MB:.2f} MB) exceeds the allowed limit of {sql_lab_payload_max_mb} MB.",
error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
level=ErrorLevel.ERROR,
)
)

cache_timeout = database.cache_timeout
if cache_timeout is None:
cache_timeout = config["CACHE_DEFAULT_TIMEOUT"]
Expand Down Expand Up @@ -635,6 +654,23 @@ def execute_sql_statements(
"expanded_columns": expanded_columns,
}
)
# Check the size of the serialized payload (opt-in logic for return_results)
if sql_lab_payload_max_mb := config.get("SQLLAB_PAYLOAD_MAX_MB"):
serialized_payload = _serialize_payload(
payload, cast(bool, results_backend_use_msgpack)
)
serialized_payload_size = sys.getsizeof(serialized_payload)
max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB

if serialized_payload_size > max_bytes:
logger.info("Result size exceeds the allowed limit.")
raise SupersetErrorException(
SupersetError(
message=f"Result size ({serialized_payload_size / BYTES_IN_MB:.2f} MB) exceeds the allowed limit of {sql_lab_payload_max_mb} MB.",
error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
level=ErrorLevel.ERROR,
)
)
return payload

return None
Expand Down
115 changes: 113 additions & 2 deletions tests/unit_tests/sql_lab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
# pylint: disable=import-outside-toplevel, invalid-name, unused-argument, too-many-locals

import json
from unittest import mock
from uuid import UUID

import pytest
import sqlparse
from freezegun import freeze_time
from pytest_mock import MockerFixture
Expand All @@ -27,9 +29,9 @@
from superset import db
from superset.common.db_query_status import QueryStatus
from superset.errors import ErrorLevel, SupersetErrorType
from superset.exceptions import OAuth2Error
from superset.exceptions import OAuth2Error, SupersetErrorException
from superset.models.core import Database
from superset.sql_lab import get_sql_results
from superset.sql_lab import execute_sql_statements, get_sql_results
from superset.utils.core import override_user
from tests.unit_tests.models.core_test import oauth2_client_info

Expand Down Expand Up @@ -125,6 +127,115 @@ def test_execute_sql_statement_with_rls(
SupersetResultSet.assert_called_with([(42,)], cursor.description, db_engine_spec)


@mock.patch.dict(
"superset.sql_lab.config",
{"SQLLAB_PAYLOAD_MAX_MB": 50}, # Set the desired config value for testing
)
def test_execute_sql_statement_exceeds_payload_limit(mocker: MockerFixture) -> None:
"""
Test for `execute_sql_statements` when the result payload size exceeds the limit.
"""

# Mock the query object and database
query = mocker.MagicMock()
query.limit = 1
query.database = mocker.MagicMock()
query.database.db_engine_spec.is_select_query.return_value = True
query.database.cache_timeout = 100
query.status = "RUNNING"
query.select_as_cta = False
query.database.allow_run_async = True

# Mock get_query to return our mocked query object
mocker.patch("superset.sql_lab.get_query", return_value=query)

# Mock sys.getsizeof to simulate a large payload size
mocker.patch("sys.getsizeof", return_value=100000000) # 100 MB

# Mock _serialize_payload
def mock_serialize_payload(payload, use_msgpack):
return "serialized_payload"

mocker.patch(
"superset.sql_lab._serialize_payload", side_effect=mock_serialize_payload
)

# Mock db.session.refresh to avoid AttributeError during session refresh
mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)

# Mock the results backend to avoid "Results backend is not configured" error
mocker.patch("superset.sql_lab.results_backend", return_value=True)

# Test that the exception is raised when the payload exceeds the limit
with pytest.raises(SupersetErrorException):
execute_sql_statements(
query_id=1,
rendered_query="SELECT 42 AS answer",
return_results=True, # Simulate that results are being returned
store_results=True, # Not storing results but returning them
start_time=None,
expand_data=False,
log_params={},
)


@mock.patch.dict(
"superset.sql_lab.config",
{"SQLLAB_PAYLOAD_MAX_MB": 50}, # Set the desired config value for testing
)
def test_execute_sql_statement_within_payload_limit(mocker: MockerFixture) -> None:
"""
Test for `execute_sql_statements` when the result payload size is within the limit,
and check if the flow executes smoothly without raising any exceptions.
"""

# Mock the query object and database
query = mocker.MagicMock()
query.limit = 1
query.database = mocker.MagicMock()
query.database.db_engine_spec.is_select_query.return_value = True
query.database.cache_timeout = 100
query.status = "RUNNING"
query.select_as_cta = False
query.database.allow_run_async = True

# Mock get_query to return our mocked query object
mocker.patch("superset.sql_lab.get_query", return_value=query)

# Mock sys.getsizeof to simulate a payload size that is within the limit
mocker.patch("sys.getsizeof", return_value=10000000) # 10 MB (within limit)

# Mock _serialize_payload
def mock_serialize_payload(payload, use_msgpack):
return "serialized_payload"

mocker.patch(
"superset.sql_lab._serialize_payload", side_effect=mock_serialize_payload
)

# Mock db.session.refresh to avoid AttributeError during session refresh
mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)

# Mock the results backend to avoid "Results backend is not configured" error
mocker.patch("superset.sql_lab.results_backend", return_value=True)

# Test that no exception is raised and the function executes smoothly
try:
execute_sql_statements(
query_id=1,
rendered_query="SELECT 42 AS answer",
return_results=True, # Simulate that results are being returned
store_results=True, # Not storing results but returning them
start_time=None,
expand_data=False,
log_params={},
)
except SupersetErrorException:
pytest.fail(
"SupersetErrorException should not have been raised for payload within the limit"
)


def test_sql_lab_insert_rls_as_subquery(
mocker: MockerFixture,
session: Session,
Expand Down

0 comments on commit 6ede327

Please sign in to comment.