diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 278beb852..4a987ee4a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,88 +8,134 @@ on: pull_request: jobs: - test: - name: Test - runs-on: ubuntu-latest + # test: + # name: Test + # runs-on: ubuntu-latest + # steps: + # - name: Checkout sources + # uses: actions/checkout@v2 + # + # - name: Install stable toolchain + # uses: actions-rs/toolchain@v1 + # with: + # profile: minimal + # toolchain: 1.76.0 + # override: true + # components: clippy, rustfmt + # + # - uses: Swatinem/rust-cache@v1 + # + # - name: Check package versions + # run: python scripts/check_package_versions.py + # + # - name: Build + # uses: actions-rs/cargo@v1 + # with: + # command: build + # args: --all --all-targets + # + # - name: Test + # uses: actions-rs/cargo@v1 + # with: + # command: test + # args: --all + # + # - name: Setup Python + # uses: actions/setup-python@v1 + # with: + # python-version: "3.8" + # + # - name: Install Python dependencies + # run: | + # python -m pip install --upgrade pip wheel setuptools + # python -m pip install -r tests/requirements.txt + # + # - name: Build Python binding + # run: | + # maturin build --manifest-path crates/pyhq/Cargo.toml --out wheels + # WHEEL=`realpath wheels/*.whl` + # python -m pip install $WHEEL[all] + # + # - name: Test Python + # id: python_test + # run: python -m pytest tests -n4 + # + # - name: Archive test artifacts + # if: always() && steps.python_test.outcome == 'failure' + # run: tar -cvf artifacts.tar /tmp/pytest-* + # + # - name: Upload test artifacts + # uses: actions/upload-artifact@v2 + # if: always() && steps.python_test.outcome == 'failure' + # with: + # name: pytest artifacts + # path: artifacts.tar + # + # - name: Lint Rust + # uses: actions-rs/cargo@v1 + # with: + # command: clippy + # args: --all -- -D warnings + # + # - name: Check Rust formatting + # uses: actions-rs/cargo@v1 + # with: + # command: fmt + # args: --all -- --check + # + # - name: Lint Python + # run: python -m ruff check + # + # - name: Check Python formatting + # run: python -m ruff format --check + # + # - name: Build docs + # run: | + # python -m pip install -r docs/requirements.txt + # mkdocs build + + compatibility_mode: + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + # - os: windows-latest + # target: x86_64-pc-windows-msvc + - os: macos-latest + target: x86_64-apple-darwin steps: - name: Checkout sources - uses: actions/checkout@v2 - + uses: actions/checkout@v4 - name: Install stable toolchain uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: 1.76.0 + toolchain: stable override: true - components: clippy, rustfmt - uses: Swatinem/rust-cache@v1 - - - name: Check package versions - run: python scripts/check_package_versions.py - - - name: Build - uses: actions-rs/cargo@v1 with: - command: build - args: --all --all-targets + key: ${{ matrix.os }} - - name: Test - uses: actions-rs/cargo@v1 - with: - command: test - args: --all + - name: Build HyperQueue + run: cargo build --no-default-features + - name: Run HyperQueue + run: cargo run --no-default-features -- --version - name: Setup Python uses: actions/setup-python@v1 with: python-version: "3.8" - - name: Install Python dependencies run: | python -m pip install --upgrade pip wheel setuptools python -m pip install -r tests/requirements.txt + - name: Install coreutils + run: brew install coreutils + - name: Build Python binding run: | maturin build --manifest-path crates/pyhq/Cargo.toml --out wheels WHEEL=`realpath wheels/*.whl` python -m pip install $WHEEL[all] - - - name: Test Python - id: python_test - run: python -m pytest tests -n4 - - - name: Archive test artifacts - if: always() && steps.python_test.outcome == 'failure' - run: tar -cvf artifacts.tar /tmp/pytest-* - - - name: Upload test artifacts - uses: actions/upload-artifact@v2 - if: always() && steps.python_test.outcome == 'failure' - with: - name: pytest artifacts - path: artifacts.tar - - - name: Lint Rust - uses: actions-rs/cargo@v1 - with: - command: clippy - args: --all -- -D warnings - - - name: Check Rust formatting - uses: actions-rs/cargo@v1 - with: - command: fmt - args: --all -- --check - - - name: Lint Python - run: python -m ruff check - - - name: Check Python formatting - run: python -m ruff format --check - - - name: Build docs - run: | - python -m pip install -r docs/requirements.txt - mkdocs build diff --git a/benchmarks/ligen_benchmarks.py b/benchmarks/ligen_benchmarks.py index 0274d4373..a42c87e29 100644 --- a/benchmarks/ligen_benchmarks.py +++ b/benchmarks/ligen_benchmarks.py @@ -12,6 +12,7 @@ import pandas as pd from ligate.ligen.expansion import SubmittedExpansion +from src.build.hq import Profile from hyperqueue import Client, Job from hyperqueue.job import SubmittedJob from src.analysis.chart import render_chart_to_png @@ -42,13 +43,26 @@ cli = create_cli() +@dataclasses.dataclass +class LigenConfig: + container_path: Path + smi_path: Path + max_molecules: int + screening_threads: int + + def __post_init__(self): + assert self.container_path.is_file() + assert self.smi_path.is_file() + + class LigenHQWorkload(Workload): - def __init__(self, smi_path: Path, max_molecules: int, screening_threads: int): - self.smi_path = smi_path - self.max_molecules = max_molecules - self.screening_threads = min(max_molecules, screening_threads) - if self.screening_threads != screening_threads: - logging.warning(f"Setting screening threads to {self.max_molecules}, because there won't be more work") + def __init__(self, config: LigenConfig): + self.config = config + self.screening_threads = min(self.config.max_molecules, self.config.screening_threads) + if self.screening_threads != self.config.screening_threads: + logging.warning( + f"Setting screening threads to {self.config.max_molecules}, because there won't be more work" + ) def name(self) -> str: return "ligen-vscreen" @@ -56,8 +70,8 @@ def name(self) -> str: def parameters(self) -> Dict[str, Any]: return { "env": "hq", - "smi": self.smi_path.name, - "molecules-per-task": self.max_molecules, + "smi": self.config.smi_path.name, + "molecules-per-task": self.config.max_molecules, "screening-threads": self.screening_threads, } @@ -85,10 +99,10 @@ def submit_ligen_benchmark(self, env: HqEnvironment, client: Client) -> Submitte ctx = LigenTaskContext(workdir=workdir, container_path=Path(CONTAINER_PATH).absolute()) expansion_configs = create_configs_from_smi( - input_smi=self.smi_path, + input_smi=self.config.smi_path, workdir_inputs=inputs, workdir_outputs=outputs, - max_molecules=self.max_molecules, + max_molecules=self.config.max_molecules, ) job = Job(workdir, default_env=dict(HQ_PYLOG="DEBUG")) @@ -142,10 +156,10 @@ def submit_ligen_benchmark(self, env: DaskEnvironment, client: distributed.Clien ctx = LigenTaskContext(workdir=workdir, container_path=Path(CONTAINER_PATH).absolute()) expansion_configs = create_configs_from_smi( - input_smi=self.smi_path, + input_smi=self.config.smi_path, workdir_inputs=inputs, workdir_outputs=outputs, - max_molecules=self.max_molecules, + max_molecules=self.config.max_molecules, ) # TODO: use client.map @@ -207,7 +221,13 @@ def gen_descriptions(env: EnvironmentDescriptor, workload_cls) -> List[Benchmark for max_molecules, threads in variants: if max_molecules == 1 and threads > 1: continue - workload = workload_cls(smi_path=input_smi, max_molecules=max_molecules, screening_threads=threads) + config = LigenConfig( + container_path=CONTAINER_PATH, + smi_path=input_smi, + max_molecules=max_molecules, + screening_threads=threads, + ) + workload = workload_cls(config=config) yield BenchmarkDescriptor(env_descriptor=env, workload=workload, timeout=timeout) yield from gen_descriptions(hq_env, LigenHQWorkload) @@ -283,19 +303,25 @@ def benchmark_aggregated_vs_separate_tasks(): This benchmark tests the performance of Ligen + HQ when we use a single task per input ligand, vs. when we use 4/8/16 ligands for each task. """ - hq_path = get_hq_binary() + hq_path = get_hq_binary(profile=Profile.Dist) env = single_node_hq_cluster(hq_path, worker_threads=min(multiprocessing.cpu_count(), 64), version="base") input_smi = get_dataset_path(Path("ligen/artif-2.smi")) variants = [(1, 1)] # , (4, 4), (8, 8)] descriptions = [] for max_molecules, num_threads in variants: - workload = LigenHQWorkload(smi_path=input_smi, max_molecules=max_molecules, screening_threads=num_threads) + config = LigenConfig( + container_path=CONTAINER_PATH, + smi_path=input_smi, + max_molecules=max_molecules, + screening_threads=num_threads, + ) + workload = LigenHQWorkload(config) descriptions.append( BenchmarkDescriptor(env_descriptor=env, workload=workload, timeout=datetime.timedelta(minutes=10)) ) run_benchmarks_with_postprocessing(BENCH_WORKDIR, descriptions) - df = analyze_results_utilization(BENCH_WORKDIR / DEFAULT_DATA_JSON) + df = analyze_results_utilization(Database.from_file(BENCH_WORKDIR / DEFAULT_DATA_JSON)) output_dir = get_output_path(Path("aggregated_vs_separate_tasks")) df.to_csv(output_dir / "results.csv", index=False) @@ -307,5 +333,5 @@ def benchmark_aggregated_vs_separate_tasks(): format="%(levelname)s:%(asctime)s.%(msecs)03d:%(funcName)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) - # benchmark_aggregated_vs_separate_tasks() - cli() + benchmark_aggregated_vs_separate_tasks() + # cli() diff --git a/benchmarks/src/build/hq.py b/benchmarks/src/build/hq.py index dcaebce98..048af3aba 100644 --- a/benchmarks/src/build/hq.py +++ b/benchmarks/src/build/hq.py @@ -1,3 +1,4 @@ +import enum import logging import os import shutil @@ -12,10 +13,46 @@ from .. import ROOT_DIR +class Profile(enum.Enum): + Debug = 0 + Release = 1 + Dist = 2 + + def name(self) -> str: + if self == Profile.Debug: + return "dev" + elif self == Profile.Release: + return "release" + elif self == Profile.Dist: + return "dist" + else: + assert False + + def target_name(self) -> str: + if self == Profile.Debug: + return "debug" + elif self == Profile.Release: + return "release" + elif self == Profile.Dist: + return "dist" + else: + assert False + + def flags(self) -> List[str]: + if self == Profile.Debug: + return [] + elif self == Profile.Release: + return ["--release"] + elif self == Profile.Dist: + return ["--profile", "dist"] + else: + assert False + + @dataclasses.dataclass class BuildConfig: git_ref: str = TAG_WORKSPACE - release: bool = True + profile: Profile = Profile.Release zero_worker: bool = False debug_symbols: bool = False @@ -28,15 +65,11 @@ class BuiltBinary: def get_build_dir(options: BuildConfig) -> Path: path = ROOT_DIR / "target" - if options.release: - return path / "release" - return path / "debug" + return path / options.profile.target_name() def binary_name(options: BuildConfig, resolved_ref: str) -> str: - name = f"hq-{resolved_ref}" - if not options.release: - name += "-debug" + name = f"hq-{resolved_ref}-{options.profile.name()}" if options.zero_worker: name += "-zw" return name @@ -56,20 +89,20 @@ def build_tag(config: BuildConfig, resolved_ref: str) -> Path: return path build_description = ( - f"{tag} (release={config.release}, zero_worker={config.zero_worker}, debug_symbols={config.debug_symbols})" + f"{tag} (profile={config.profile.name()}, zero_worker={config.zero_worker}, debug_symbols={config.debug_symbols})" ) with checkout_tag(tag): logging.info(f"Building {build_description}") env = os.environ.copy() if config.debug_symbols: - profile_name = "RELEASE" if config.release else "DEV" + profile_name = config.profile.name().upper() env[f"CARGO_PROFILE_{profile_name}_DEBUG"] = "line-tables-only" args = ["cargo", "build"] - if config.release: + args += config.profile.flags() + if config.profile != Profile.Debug: env["RUSTFLAGS"] = "-C target-cpu=native" - args += ["--release"] if config.zero_worker: args += ["--features", "zero-worker"] diff --git a/crates/tako/src/launcher.rs b/crates/tako/src/launcher.rs index 81cc2154b..6d7392302 100644 --- a/crates/tako/src/launcher.rs +++ b/crates/tako/src/launcher.rs @@ -168,6 +168,7 @@ pub fn command_from_definitions(definition: &ProgramDefinition) -> crate::Result let mut command = Command::new(definition.args[0].to_os_str_lossy()); + #[cfg(target_os = "linux")] unsafe { command.pre_exec(|| { // We need to create a new process group for the task, so that we can