Skip to content

Commit

Permalink
perf(export): export generates unnecessary files content (apache#26765)
Browse files Browse the repository at this point in the history
  • Loading branch information
Always-prog authored Feb 21, 2024
1 parent 744f68d commit 2e4f6d3
Show file tree
Hide file tree
Showing 23 changed files with 271 additions and 168 deletions.
2 changes: 1 addition & 1 deletion superset/charts/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ def export(self, **kwargs: Any) -> Response:
try:
for file_name, file_content in ExportChartsCommand(requested_ids).run():
with bundle.open(f"{root}/{file_name}", "w") as fp:
fp.write(file_content.encode())
fp.write(file_content().encode())
except ChartNotFoundError:
return self.response_404()
buf.seek(0)
Expand Down
4 changes: 2 additions & 2 deletions superset/cli/importexport.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def export_dashboards(dashboard_file: Optional[str] = None) -> None:
with ZipFile(dashboard_file, "w") as bundle:
for file_name, file_content in ExportDashboardsCommand(dashboard_ids).run():
with bundle.open(f"{root}/{file_name}", "w") as fp:
fp.write(file_content.encode())
fp.write(file_content().encode())
except Exception: # pylint: disable=broad-except
logger.exception(
"There was an error when exporting the dashboards, please check "
Expand Down Expand Up @@ -116,7 +116,7 @@ def export_datasources(datasource_file: Optional[str] = None) -> None:
with ZipFile(datasource_file, "w") as bundle:
for file_name, file_content in ExportDatasetsCommand(dataset_ids).run():
with bundle.open(f"{root}/{file_name}", "w") as fp:
fp.write(file_content.encode())
fp.write(file_content().encode())
except Exception: # pylint: disable=broad-except
logger.exception(
"There was an error when exporting the datasets, please check "
Expand Down
17 changes: 14 additions & 3 deletions superset/commands/chart/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import json
import logging
from collections.abc import Iterator
from typing import Callable

import yaml

Expand All @@ -42,10 +43,12 @@ class ExportChartsCommand(ExportModelsCommand):
not_found = ChartNotFoundError

@staticmethod
def _export(model: Slice, export_related: bool = True) -> Iterator[tuple[str, str]]:
def _file_name(model: Slice) -> str:
file_name = get_filename(model.slice_name, model.id)
file_path = f"charts/{file_name}.yaml"
return f"charts/{file_name}.yaml"

@staticmethod
def _file_content(model: Slice) -> str:
payload = model.export_to_dict(
recursive=False,
include_parent_ref=False,
Expand All @@ -69,7 +72,15 @@ def _export(model: Slice, export_related: bool = True) -> Iterator[tuple[str, st
payload["dataset_uuid"] = str(model.table.uuid)

file_content = yaml.safe_dump(payload, sort_keys=False)
yield file_path, file_content
return file_content

@staticmethod
def _export(
model: Slice, export_related: bool = True
) -> Iterator[tuple[str, Callable[[], str]]]:
yield ExportChartsCommand._file_name(
model
), lambda: ExportChartsCommand._file_content(model)

if model.table and export_related:
yield from ExportDatasetsCommand([model.table.id]).run()
66 changes: 45 additions & 21 deletions superset/commands/dashboard/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import logging
import random
import string
from typing import Any, Optional
from typing import Any, Optional, Callable
from collections.abc import Iterator

import yaml
Expand Down Expand Up @@ -106,14 +106,13 @@ class ExportDashboardsCommand(ExportModelsCommand):
dao = DashboardDAO
not_found = DashboardNotFoundError

# pylint: disable=too-many-locals
@staticmethod
def _export(
model: Dashboard, export_related: bool = True
) -> Iterator[tuple[str, str]]:
def _file_name(model: Dashboard) -> str:
file_name = get_filename(model.dashboard_title, model.id)
file_path = f"dashboards/{file_name}.yaml"
return f"dashboards/{file_name}.yaml"

@staticmethod
def _file_content(model: Dashboard) -> str:
payload = model.export_to_dict(
recursive=False,
include_parent_ref=False,
Expand All @@ -131,20 +130,6 @@ def _export(
logger.info("Unable to decode `%s` field: %s", key, value)
payload[new_name] = {}

# Extract all native filter datasets and replace native
# filter dataset references with uuid
for native_filter in payload.get("metadata", {}).get(
"native_filter_configuration", []
):
for target in native_filter.get("targets", []):
dataset_id = target.pop("datasetId", None)
if dataset_id is not None:
dataset = DatasetDAO.find_by_id(dataset_id)
if dataset:
target["datasetUuid"] = str(dataset.uuid)
if export_related:
yield from ExportDatasetsCommand([dataset_id]).run()

# the mapping between dashboard -> charts is inferred from the position
# attribute, so if it's not present we need to add a default config
if not payload.get("position"):
Expand All @@ -163,8 +148,47 @@ def _export(
payload["version"] = EXPORT_VERSION

file_content = yaml.safe_dump(payload, sort_keys=False)
yield file_path, file_content
return file_content

@staticmethod
def _export(
model: Dashboard, export_related: bool = True
) -> Iterator[tuple[str, Callable[[], str]]]:
yield ExportDashboardsCommand._file_name(
model
), lambda: ExportDashboardsCommand._file_content(model)

if export_related:
chart_ids = [chart.id for chart in model.slices]
yield from ExportChartsCommand(chart_ids).run()

payload = model.export_to_dict(
recursive=False,
include_parent_ref=False,
include_defaults=True,
export_uuids=True,
)
# TODO (betodealmeida): move this logic to export_to_dict once this
# becomes the default export endpoint
for key, new_name in JSON_KEYS.items():
value: Optional[str] = payload.pop(key, None)
if value:
try:
payload[new_name] = json.loads(value)
except (TypeError, json.decoder.JSONDecodeError):
logger.info("Unable to decode `%s` field: %s", key, value)
payload[new_name] = {}

# Extract all native filter datasets and replace native
# filter dataset references with uuid
for native_filter in payload.get("metadata", {}).get(
"native_filter_configuration", []
):
for target in native_filter.get("targets", []):
dataset_id = target.pop("datasetId", None)
if dataset_id is not None:
dataset = DatasetDAO.find_by_id(dataset_id)
if dataset:
target["datasetUuid"] = str(dataset.uuid)
if export_related:
yield from ExportDatasetsCommand([dataset_id]).run()
28 changes: 19 additions & 9 deletions superset/commands/database/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
# specific language governing permissions and limitations
# under the License.
# isort:skip_file

import functools
import json
import logging
from typing import Any
from typing import Any, Callable
from collections.abc import Iterator

import yaml
Expand Down Expand Up @@ -56,12 +56,12 @@ class ExportDatabasesCommand(ExportModelsCommand):
not_found = DatabaseNotFoundError

@staticmethod
def _export(
model: Database, export_related: bool = True
) -> Iterator[tuple[str, str]]:
def _file_name(model: Database) -> str:
db_file_name = get_filename(model.database_name, model.id, skip_id=True)
file_path = f"databases/{db_file_name}.yaml"
return f"databases/{db_file_name}.yaml"

@staticmethod
def _file_content(model: Database) -> str:
payload = model.export_to_dict(
recursive=False,
include_parent_ref=False,
Expand Down Expand Up @@ -100,9 +100,18 @@ def _export(
payload["version"] = EXPORT_VERSION

file_content = yaml.safe_dump(payload, sort_keys=False)
yield file_path, file_content
return file_content

@staticmethod
def _export(
model: Database, export_related: bool = True
) -> Iterator[tuple[str, Callable[[], str]]]:
yield ExportDatabasesCommand._file_name(
model
), lambda: ExportDatabasesCommand._file_content(model)

if export_related:
db_file_name = get_filename(model.database_name, model.id, skip_id=True)
for dataset in model.tables:
ds_file_name = get_filename(
dataset.table_name, dataset.id, skip_id=True
Expand All @@ -118,5 +127,6 @@ def _export(
payload["version"] = EXPORT_VERSION
payload["database_uuid"] = str(model.uuid)

file_content = yaml.safe_dump(payload, sort_keys=False)
yield file_path, file_content
yield file_path, functools.partial( # type: ignore
yaml.safe_dump, payload, sort_keys=False
)
25 changes: 18 additions & 7 deletions superset/commands/dataset/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import json
import logging
from collections.abc import Iterator
from typing import Callable

import yaml

Expand All @@ -41,15 +42,15 @@ class ExportDatasetsCommand(ExportModelsCommand):
not_found = DatasetNotFoundError

@staticmethod
def _export(
model: SqlaTable, export_related: bool = True
) -> Iterator[tuple[str, str]]:
def _file_name(model: SqlaTable) -> str:
db_file_name = get_filename(
model.database.database_name, model.database.id, skip_id=True
)
ds_file_name = get_filename(model.table_name, model.id, skip_id=True)
file_path = f"datasets/{db_file_name}/{ds_file_name}.yaml"
return f"datasets/{db_file_name}/{ds_file_name}.yaml"

@staticmethod
def _file_content(model: SqlaTable) -> str:
payload = model.export_to_dict(
recursive=True,
include_parent_ref=False,
Expand Down Expand Up @@ -78,10 +79,21 @@ def _export(
payload["database_uuid"] = str(model.database.uuid)

file_content = yaml.safe_dump(payload, sort_keys=False)
yield file_path, file_content
return file_content

@staticmethod
def _export(
model: SqlaTable, export_related: bool = True
) -> Iterator[tuple[str, Callable[[], str]]]:
yield ExportDatasetsCommand._file_name(
model
), lambda: ExportDatasetsCommand._file_content(model)

# include database as well
if export_related:
db_file_name = get_filename(
model.database.database_name, model.database.id, skip_id=True
)
file_path = f"databases/{db_file_name}.yaml"

payload = model.database.export_to_dict(
Expand Down Expand Up @@ -109,5 +121,4 @@ def _export(

payload["version"] = EXPORT_VERSION

file_content = yaml.safe_dump(payload, sort_keys=False)
yield file_path, file_content
yield file_path, lambda: yaml.safe_dump(payload, sort_keys=False)
Loading

0 comments on commit 2e4f6d3

Please sign in to comment.