Skip to content

Commit

Permalink
feat: Improve performance of add dataset page (#21)
Browse files Browse the repository at this point in the history
* Create celery task to warm up tables in a database schema

* Use AntdSelect for table selection in add dataset page

* fix linter errors

* Use feature flag to control change

* Fix imports to original version
  • Loading branch information
kgopal492 authored and zhangvi7 committed Aug 1, 2024
1 parent 4064687 commit 714fae9
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 4 deletions.
6 changes: 3 additions & 3 deletions superset/tasks/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,8 @@ class DashboardMetadataStrategy(Strategy): # pylint: disable=too-few-public-met
Warm up charts in dashboards with json_metadata field containing matching
`cache_warmup_schedule` value
Note: This strategy can be deprecated once TAGGING_SYSTEM is a completed production feature.
Then, we can use DashboardTagsStrategy in place of this strategy.
Note: This strategy can be deprecated once TAGGING_SYSTEM is a completed production
feature. Then, we can use DashboardTagsStrategy in place of this strategy.
CELERYBEAT_SCHEDULE = {
'cache-warmup-hourly': {
Expand All @@ -242,7 +242,7 @@ def get_payloads(self) -> list[dict[str, int]]:
# add dashboards that have cache warmup configured
cache_configured_dashboards = (
session.query(Dashboard)
.filter(Dashboard.json_metadata.like(f'%"cache_warmup_schedule"%'))
.filter(Dashboard.json_metadata.like('%"cache_warmup_schedule"%'))
.all()
)
cache_configured_dashboards = [
Expand Down
2 changes: 1 addition & 1 deletion superset/tasks/celery_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

# Need to import late, as the celery_app will have been setup by "create_app()"
# pylint: disable=wrong-import-position, unused-import
from . import cache, scheduler # isort:skip
from . import cache, scheduler, database # isort:skip

# Export the celery app globally for Celery (as run on the cmd line) to find
app = celery_app
Expand Down
61 changes: 61 additions & 0 deletions superset/tasks/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import logging

from celery.utils.log import get_task_logger

from superset import db
from superset.exceptions import SupersetException
from superset.extensions import celery_app
from superset.models.core import Database

logger = get_task_logger(__name__)
logger.setLevel(logging.INFO)


@celery_app.task(name="db_tables_cache_warm_up")
def db_tables_cache_warm_up(database_id: str, schema_name: str):
"""
Warm up tables in a database schema
beat_schedule = {
'db_tables_cache_warm_up': {
'task': 'db_tables_cache_warm_up',
'schedule': crontab(minute='*/10', hour='*'), # every 10 minutes
'kwargs': {'database_id': 1, 'schema_name': 'public'},
},
}
"""
session = db.create_scoped_session()
logger.info(
"Warming up database table cache for database_id: %i, schema_name: %s",
database_id,
schema_name,
)
try:
database = session.query(Database).filter_by(id=database_id).one_or_none()
if not database:
logger.error("Database not found, database_id: %i", database_id)

database.get_all_table_names_in_schema(
schema=schema_name,
force=True,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
database.get_all_view_names_in_schema(
schema=schema_name,
force=True,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
logger.info(
"Database tables cache warm up succeeded for database_id: %i, schema_name: %s",
database_id,
schema_name,
)
except SupersetException as ex:
logger.exception(
"Superset exception for db_tables_cache_warm_up job database_id: %i, schema_name: %s, message: %s",
database_id,
schema_name,
ex.message,
)

0 comments on commit 714fae9

Please sign in to comment.