Skip to content

Commit

Permalink
Added Functionality to export any dashboards-as-code into CSV (#269)
Browse files Browse the repository at this point in the history
### Functionality

- [X] added functionality to export to CSV using DashboardMetadata.

### Tests
<!-- How is this tested? Please see the checklist below and also
describe any other relevant tests -->

- [X] manually tested
- [X] added unit tests
  • Loading branch information
jgarciaf106 authored Sep 11, 2024
1 parent e21699f commit 5dfaaa0
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 0 deletions.
33 changes: 33 additions & 0 deletions src/databricks/labs/lsql/dashboards.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import collections
import csv
import dataclasses
import json
import logging
Expand All @@ -11,15 +12,18 @@
from collections.abc import Callable, Iterable, Sized
from dataclasses import dataclass
from enum import Enum, unique
from io import BytesIO, StringIO
from pathlib import Path
from typing import TypeVar
from zipfile import ZipFile

import sqlglot
import yaml
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.dashboards import Dashboard as SDKDashboard
from databricks.sdk.service.workspace import ExportFormat

from databricks.labs.lsql.backends import SqlBackend
from databricks.labs.lsql.lakeview import (
ColumnType,
ControlEncoding,
Expand Down Expand Up @@ -893,6 +897,35 @@ def _from_dashboard_folder(cls, folder: Path) -> "DashboardMetadata":
tiles.append(tile)
return cls(display_name=folder.name, _tiles=tiles)

def export_to_zipped_csv(self, sql_backend: SqlBackend, export_path: Path) -> Path:
"""Export the dashboard queries to CSV files directly into a ZIP archive."""
zip_export = export_path / "export_to_zipped_csv.zip"

with ZipFile(zip_export, mode="w") as zip_file:
for tile in self.tiles:
if tile.metadata.is_query():
rows = sql_backend.fetch(tile.content)

if not rows:
continue

buffer = StringIO()
writer = None

for row in rows:
if writer is None:
headers = row.asDict().keys()
writer = csv.DictWriter(buffer, fieldnames=headers)
writer.writeheader()
writer.writerow(row.asDict())

bytes_buffer = BytesIO(buffer.getvalue().encode("utf-8"))

with zip_file.open(f"{tile.metadata.id}.csv", "w") as csv_file:
csv_file.write(bytes_buffer.getvalue())

return zip_export


class Dashboards:
def __init__(self, ws: WorkspaceClient):
Expand Down
26 changes: 26 additions & 0 deletions tests/unit/test_dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.dashboards import Dashboard as SDKDashboard

from databricks.labs.lsql.backends import MockBackend
from databricks.labs.lsql.core import Row
from databricks.labs.lsql.dashboards import (
BaseHandler,
DashboardMetadata,
Expand Down Expand Up @@ -1591,3 +1593,27 @@ def test_dashboards_get_dashboard_url():
ws.config.host = "https://adb-0123456789.12.azuredatabricks.net"
dashboard_url = Dashboards(ws).get_url("1234")
assert dashboard_url == dashboard_url_expected


def test_dashboards_export_to_zipped_csv(tmp_path):
query = {
"SELECT\n one\nFROM ucx.external_locations": [
Row(location="s3://bucket1/folder1", table_count=1),
Row(location="abfss://[email protected]/folder1", table_count=1),
Row(location="gcp://folder1", table_count=2),
]
}

mock_backend = MockBackend(rows=query)

(tmp_path / "external_locations.sql").write_text(list(query.keys())[0])
export_path = tmp_path / "export"
export_path.mkdir(parents=True, exist_ok=True)

dash_metadata = DashboardMetadata(display_name="External Locations")

dash = dash_metadata.from_path(tmp_path)
dash = dash.replace_database(catalog="hive_metastore", database="ucx")
dash.export_to_zipped_csv(mock_backend, export_path)

assert len(list(export_path.glob("export_to_zipped_csv.zip"))) == 1

0 comments on commit 5dfaaa0

Please sign in to comment.