Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Remove unused aspects assets on import #950

Merged
merged 11 commits into from
Oct 4, 2024
5 changes: 5 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ asset by editing the asset in Superset and selecting "Save As" to save it to a n
# Note: If you are using custom assets you will need to rebuild your aspects-superset
# image on your local machine with `tutor images build aspects-superset --no-cache`.

Assets (charts/datasets) created for Aspects that are no longer used can be listed in
`aspects_asset_list.yaml`. These assets & any translated assets created from them,
are deleted from Superset during `init` (specifically `import-assets`). The corresponding
YAML files are deleted during `import_superset_zip` or and `check_superset_assets`.

Sharing Charts and Dashboards
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
173 changes: 95 additions & 78 deletions tutoraspects/asset_command_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,18 @@

FILE_NAME_ATTRIBUTE = "_file_name"

PLUGIN_PATH = os.path.dirname(os.path.abspath(__file__))
PLUGIN_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "templates", "aspects"
)
ASPECT_ASSET_LIST = os.path.join(
PLUGIN_PATH,
"apps",
"superset",
"pythonpath",
"aspects_asset_list.yaml",
)
ASSETS_PATH = os.path.join(
PLUGIN_PATH,
"templates",
"aspects",
"build",
"aspects-superset",
"openedx-assets",
Expand Down Expand Up @@ -465,116 +472,126 @@ def deduplicate_superset_assets(echo):
echo()
echo(click.style(f"{err} errors found!", fg="red"))

echo("Deduplication complete.")


def check_asset_names(echo):
"""
Warn about any duplicate asset names.
"""
echo("Looking for duplicate names...")
warn = 0

names = set()
for file_name, asset in _get_asset_files():
for k in ("slice_name", "dashboard_title", "database_name"):
if k in asset:
if asset[k] in names:
warn += 1
echo(
f"WARNING: Duplicate name {asset[k]} in {file_name}, this "
f"could confuse users, consider changing it."
)
names.add(asset[k])
break

echo(f"{warn} duplicate names detected.")
echo("De-duplication complete.")


def _get_all_chart_dataset_uuids():
def _get_all_uuids():
"""
Return the UUIDs of all datasets and charts in our file assets.
Return the UUIDs of all assets.
"""
all_dataset_uuids = {}
all_chart_uuids = {}
all_uuids = {"charts": {}, "datasets": {}}

# First get all known uuid's
for _, asset in _get_asset_files():
for file_path, asset in _get_asset_files():
if "slice_name" in asset:
all_chart_uuids[asset["uuid"]] = asset["slice_name"]
all_uuids["charts"][asset["uuid"]] = {
"name": asset["_file_name"],
"file_path": file_path,
}
elif "table_name" in asset:
all_dataset_uuids[asset["uuid"]] = asset["table_name"]
all_uuids["datasets"][asset["uuid"]] = {
"name": asset["table_name"],
"file_path": file_path,
}

return all_dataset_uuids, all_chart_uuids
return all_uuids


def _get_used_chart_dataset_uuids():
def _get_used_uuids():
"""
Return the UUIDs of all datasets and charts actually used in our file assets.
"""
used_dataset_uuids = set()
used_chart_uuids = set()
used_uuids = {"charts": set(), "datasets": set()}

for _, asset in _get_asset_files():
if "dashboard_title" in asset:
filters = asset["metadata"].get("native_filter_configuration", [])

for filter_config in filters:
for filter_dataset in filter_config.get("target", {}).get(
"datasetUuid", []
):
used_dataset_uuids.add(filter_dataset)
for item in filter_config.get("targets", {}):
if item.get("datasetUuid"):
used_uuids["datasets"].add(item.get("datasetUuid"))

for pos in asset["position"]:
if pos.startswith("CHART-"):
slice_uuid = asset["position"][pos]["meta"].get("uuid")

if slice_uuid:
used_chart_uuids.add(slice_uuid)
used_uuids["charts"].add(asset["position"][pos]["meta"].get("uuid"))

if "slice_name" in asset:
dataset_uuid = asset["dataset_uuid"]
used_dataset_uuids.add(dataset_uuid)
used_uuids["datasets"].add(asset["dataset_uuid"])

return used_dataset_uuids, used_chart_uuids
return used_uuids


def check_orphan_assets(echo):
def _find_unused_assets():
"""
Warn about any potentially unused assets.
Find potentially unused assets.
UUIDs listed as 'ignored' in aspects_asset_list.yaml are owned
by Aspects and will be removed from the list of potential unused assets.
"""
echo("Looking for potentially orphaned assets...")
all_uuids = _get_all_uuids()
used_uuids = _get_used_uuids()

all_dataset_uuids, all_chart_uuids = _get_all_chart_dataset_uuids()
used_dataset_uuids, used_chart_uuids = _get_used_chart_dataset_uuids()
# Remove uuids from 'all' list that are in used list
for asset_type, uuids in used_uuids.items():
for uuid in uuids or []:
try:
all_uuids[asset_type].pop(uuid)
except KeyError:
click.echo(
click.style(
f"WARNING: {asset_type} {uuid} used but not found!", fg="red"
)
)

for k in used_dataset_uuids:
try:
all_dataset_uuids.pop(k)
except KeyError:
click.echo(
click.style(f"WARNING: Dataset {k} used nut not found!", fg="red")
)
# Remove uuids from 'all' list that are in ignored yaml
with open(ASPECT_ASSET_LIST, "r", encoding="utf-8") as file:
aspects_assets = yaml.safe_load(file)

# Remove the "Query performance" chart from the list, it's needed for
# the performance_metrics script, but not in any dashboard.
all_chart_uuids.pop("bb13bb31-c797-4ed3-a7f9-7825cc6dc482", None)
ignored_uuids = aspects_assets.get("ignored_uuids")
for asset_type in ignored_uuids:
for uuid in ignored_uuids[asset_type] or []:
all_uuids[asset_type].pop(uuid, None)

for k in used_chart_uuids:
try:
all_chart_uuids.pop(k)
except KeyError:
click.echo(click.style(f"WARNING: Chart {k} used nut not found!", fg="red"))
return all_uuids

echo()

if all_dataset_uuids:
echo(click.style("Potentially unused datasets detected:", fg="yellow"))
echo("\n".join(sorted(all_dataset_uuids.values())))
def delete_aspects_unused_assets(echo):
"""
Warn about any potentially unused assets AND
delete any unused chart and dataset yamls whose UUIDs are listed in
aspects_assets_list.yaml - these are owned by Aspects and can safely
be deleted.
"""
unused_uuids = _find_unused_assets()

count_unused_uuids = sum(len(uuids) for asset_type, uuids in unused_uuids.items())
if count_unused_uuids:
with open(ASPECT_ASSET_LIST, "r", encoding="utf-8") as file:
aspects_assets = yaml.safe_load(file)

unused_aspects_uuids = aspects_assets.get("unused_uuids")
for asset_type in unused_aspects_uuids:
for uuid in unused_aspects_uuids[asset_type] or []:
if uuid in unused_uuids[asset_type]:
data = unused_uuids[asset_type][uuid]
echo(
f"Deleting unused {asset_type} yaml {data.get('name')} (UUID: {uuid})"
)
os.remove(data.get("file_path"))
unused_uuids[asset_type].pop(uuid)

new_count_unused_uuids = sum(
len(uuids) for asset_type, uuids in unused_uuids.items()
)

if all_chart_uuids:
echo(click.style("Potentially unused charts detected:", fg="yellow"))
echo("\n".join(sorted(all_chart_uuids.values())))
if new_count_unused_uuids:
echo(click.style("Potentially unused assets detected:", fg="yellow"))
echo(
click.style(
"Add the UUIDs to aspects_asset_list.yaml to be deleted", fg="green"
)
)

if not all_dataset_uuids and not all_chart_uuids:
echo(f"{len(all_chart_uuids) + len(all_dataset_uuids)} orphans detected.")
for asset_type, uuids in unused_uuids.items():
for uuid, data in uuids.items():
echo(f'{asset_type} {data.get("name")} (UUID: {uuid})')
13 changes: 3 additions & 10 deletions tutoraspects/commands_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@
from tutoraspects.asset_command_helpers import (
ASSETS_PATH,
SupersetCommandError,
check_asset_names,
check_orphan_assets,
deduplicate_superset_assets,
import_superset_assets,
delete_aspects_unused_assets,
)


Expand Down Expand Up @@ -366,9 +365,7 @@ def serialize_zip(file, base_assets_path):

click.echo()
deduplicate_superset_assets(click.echo)

click.echo()
check_asset_names(click.echo)
delete_aspects_unused_assets(click.echo)

click.echo()
click.echo("Asset merge complete!")
Expand All @@ -387,11 +384,7 @@ def check_superset_assets():
Deduplicate assets by UUID, and check for duplicate asset names.
"""
deduplicate_superset_assets(click.echo)

click.echo()
check_asset_names(click.echo)
click.echo()
check_orphan_assets(click.echo)
delete_aspects_unused_assets(click.echo)

click.echo()
click.echo(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
unused_uuids:
datasets:
charts:
- a433e3cc-8ed5-454a-8b17-5dd75cfc84e4 # Course Info
- e0098cfe-a312-4c49-8efd-7e74256b6ea4 # Course info
- d3ae0546-37a8-4841-a57b-8087a6c33049 # Evolution of engagement
- 2f2af8b0-94ae-4300-b71f-3bd7f9fc127c # Enrollment counts
- bd37be7f-6672-4dca-80ae-a54f69d169da # Enrollment counts
- dde44a03-649f-4d77-990b-a95be204e1ba # Learner performance
- 4e48b8f9-e757-4263-a9d7-d18018620a24 # Learner performance
- 4250c976-a9b7-43ff-b5ad-8dd00a5acef7 # Learner performance breakdown
- 9c3f7291-1bd9-4b2f-abc0-472aad3aff06 # Learner performance breakdown
- 8b0535a8-a43f-49bf-9d50-439a16bd3f74 # Video engagement

ignored_uuids:
datasets:
charts:
- bb13bb31-c797-4ed3-a7f9-7825cc6dc482 # "Query performance" needed for the performance_metrics script
Loading
Loading