Skip to content

Commit

Permalink
moved clean and replay to orchestrate
Browse files Browse the repository at this point in the history
  • Loading branch information
wangpatrick57 committed Dec 31, 2024
1 parent 0c01d07 commit dde1f14
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 38 deletions.
File renamed without changes.
39 changes: 4 additions & 35 deletions manage/cli.py → orchestrate/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from itertools import chain
from pathlib import Path

import click
from gymlib.workspace import (
DBGymWorkspace,
get_runs_path_from_workspace_path,
Expand All @@ -14,7 +13,7 @@
)


# This is used in test_clean.py. It's defined here to avoid a circular import.
# This is used in test_clean.py. However, we also need it in count_files_in_workspace, so it's defined here to avoid a circular import.
class MockDBGymWorkspace:
def __init__(self, scratchspace_path: Path):
self.dbgym_workspace_path = scratchspace_path
Expand All @@ -24,32 +23,6 @@ def __init__(self, scratchspace_path: Path):
self.dbgym_runs_path = get_runs_path_from_workspace_path(scratchspace_path)


@click.group(name="manage")
def manage_group() -> None:
pass


@click.command("clean")
@click.pass_obj
@click.option(
"--mode",
type=click.Choice(["safe", "aggressive"]),
default="safe",
help='The mode to clean the workspace (default="safe"). "aggressive" means "only keep run_*/ folders referenced by a file in symlinks/". "safe" means "in addition to that, recursively keep any run_*/ folders referenced by any symlinks in run_*/ folders we are keeping."',
)
def manage_clean(dbgym_workspace: DBGymWorkspace, mode: str) -> None:
clean_workspace(dbgym_workspace, mode=mode, verbose=True)


@click.command("count")
@click.pass_obj
def manage_count(dbgym_workspace: DBGymWorkspace) -> None:
num_files = _count_files_in_workspace(dbgym_workspace)
print(
f"The workspace ({dbgym_workspace.dbgym_workspace_path}) has {num_files} total files/dirs/symlinks."
)


def add_symlinks_in_path(
symlinks_stack: list[Path], root_path: Path, processed_symlinks: set[Path]
) -> None:
Expand All @@ -66,7 +39,7 @@ def add_symlinks_in_path(
processed_symlinks.add(file_path)


def _count_files_in_workspace(
def count_files_in_workspace(
dbgym_workspace: DBGymWorkspace | MockDBGymWorkspace,
) -> int:
"""
Expand Down Expand Up @@ -173,15 +146,15 @@ def clean_workspace(

# 3. Go through all children of task_runs/*, deleting any that we weren't told to keep
# It's true that symlinks might link outside of task_runs/*. We'll just not care about those
starting_num_files = _count_files_in_workspace(dbgym_workspace)
starting_num_files = count_files_in_workspace(dbgym_workspace)
if dbgym_workspace.dbgym_runs_path.exists():
for child_path in dbgym_workspace.dbgym_runs_path.iterdir():
if child_path not in task_run_child_paths_to_keep:
if child_path.is_dir():
shutil.rmtree(child_path)
else:
os.remove(child_path)
ending_num_files = _count_files_in_workspace(dbgym_workspace)
ending_num_files = count_files_in_workspace(dbgym_workspace)

if verbose:
logging.info(
Expand All @@ -190,7 +163,3 @@ def clean_workspace(
logging.info(
f"Workspace went from {starting_num_files - ending_num_files} to {starting_num_files}"
)


manage_group.add_command(manage_clean)
manage_group.add_command(manage_count)
34 changes: 34 additions & 0 deletions orchestrate/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import click
from gymlib.workspace import DBGymWorkspace

from orchestrate.clean import clean_workspace, count_files_in_workspace


@click.group(name="manage")
def manage_group() -> None:
pass


@click.command("clean")
@click.pass_obj
@click.option(
"--mode",
type=click.Choice(["safe", "aggressive"]),
default="safe",
help='The mode to clean the workspace (default="safe"). "aggressive" means "only keep run_*/ folders referenced by a file in symlinks/". "safe" means "in addition to that, recursively keep any run_*/ folders referenced by any symlinks in run_*/ folders we are keeping."',
)
def manage_clean(dbgym_workspace: DBGymWorkspace, mode: str) -> None:
clean_workspace(dbgym_workspace, mode=mode, verbose=True)


@click.command("count")
@click.pass_obj
def manage_count(dbgym_workspace: DBGymWorkspace) -> None:
num_files = count_files_in_workspace(dbgym_workspace)
print(
f"The workspace ({dbgym_workspace.dbgym_workspace_path}) has {num_files} total files/dirs/symlinks."
)


manage_group.add_command(manage_clean)
manage_group.add_command(manage_count)
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import unittest

from gymlib.replay import replay
from gymlib.tests.gymlib_integtest_util import GymlibIntegtestManager
from gymlib.tuning_artifacts import (
DBMSConfigDelta,
Expand All @@ -12,6 +11,7 @@
from gymlib.workspace import DBGymWorkspace

from benchmark.tpch.constants import DEFAULT_TPCH_SEED
from orchestrate.replay import replay


class ReplayTests(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
verify_structure,
)

from manage.cli import MockDBGymWorkspace, clean_workspace
from orchestrate.clean import MockDBGymWorkspace, clean_workspace

# This is here instead of on `if __name__ == "__main__"` because we often run individual tests, which
# does not go through the `if __name__ == "__main__"` codepath.
Expand Down
2 changes: 1 addition & 1 deletion task.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from benchmark.cli import benchmark_group
from dbms.cli import dbms_group
from manage.cli import manage_group
from orchestrate.cli import manage_group

# TODO(phw2): Save commit, git diff, and run command.
# TODO(phw2): Remove write permissions on old run_*/ dirs to enforce that they are immutable.
Expand Down

0 comments on commit dde1f14

Please sign in to comment.