Skip to content

Commit

Permalink
UPDATE: ruth-distributed parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
frackledust authored and paulo308 committed Apr 1, 2024
1 parent d6d09e8 commit c13756a
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
5 changes: 4 additions & 1 deletion ruth/zeromq/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ distributed spawning of workers accross nodes.
In order to run the simulation in distributed fashion we can use ```bench.py```, specifically function ```run```.
For correct incorporation of nodes spawned and configuration this function has to be used inside ```ruth```, otherwise
we may simply use ```bench.py``` and edit it's parameters since it spawns the ```run``` with following parameters:

```
WORK_DIR = Path(os.getcwd()).absolute()
WORKER_DIR = WORK_DIR / str(sys.argv[1])
Expand All @@ -61,6 +60,10 @@ try_to_kill = False
run(...)
```
Or run the command:
```
ruth-distributed EXPERIMENT_NAME EVKIT_DIR_PATH --config-file="config.json" --workers=32
```

## Submitting a job
In order to submit a job to a cluster, for example SLURM, we may use:
Expand Down
5 changes: 0 additions & 5 deletions ruth/zeromq/bench.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import itertools
import logging
import pandas as pd
import time
Expand All @@ -7,16 +6,12 @@
import subprocess
import os
import sys
import signal

from typing import List
from pathlib import Path
from collections import defaultdict
from contextlib import closing
from dataclasses import dataclass
from cluster.cluster import Cluster, start_process
from cluster import cluster
from src.client import Client


def get_pbs_nodes() -> List[str]:
Expand Down
12 changes: 7 additions & 5 deletions ruth/zeromq/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@


@click.command()
@click.argument("worker-dir", type=str)
@click.argument("evkit-path", type=click.Path(exists=True))
@click.argument("experiment-name", type=str)
@click.argument("evkit-dir-path", type=click.Path(exists=True))
@click.option("--config-file", type=click.Path(exists=True), help="Path to simulation config.", default="config.json")
@click.option("--workers", type=int, default=32, help="Number of workers. Default 32.")
@click.option("--spawn-workers-at-main-node", is_flag=True, help="Spawn workers at main node.")
@click.option("--try-to-kill", is_flag=True, help="Try to kill workers after simulation is computed.")
def distributed(worker_dir, evkit_path, config_file, workers, try_to_kill):
def distributed(experiment_name, evkit_path, config_file, workers, spawn_workers_at_main_node, try_to_kill):
work_dir = Path(os.getcwd()).absolute()
worker_dir = work_dir / worker_dir
worker_dir = work_dir / experiment_name
env_path = os.environ["VIRTUAL_ENV"]
modules = [
"Python/3.10.8-GCCcore-12.2.0",
Expand All @@ -39,7 +40,8 @@ def distributed(worker_dir, evkit_path, config_file, workers, try_to_kill):
EVKIT_PATH=evkit_path,
MODULES=modules,
ENV_PATH=env_path,
try_to_kill=try_to_kill
try_to_kill=try_to_kill,
spawn_workers_at_main_node=spawn_workers_at_main_node
)
# result = bench(nodes, WORKER_DIR)

Expand Down

0 comments on commit c13756a

Please sign in to comment.