Skip to content

Commit

Permalink
additional fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
cg505 committed Dec 20, 2024
1 parent 4c54642 commit aeaaf7b
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion sky/jobs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

JOBS_CONTROLLER_TEMPLATE = 'jobs-controller.yaml.j2'
JOBS_CONTROLLER_YAML_PREFIX = '~/.sky/jobs_controller'
JOBS_CONTROLLER_LOGS_DIR = '~/sky_controller_logs'
JOBS_CONTROLLER_LOGS_DIR = '~/sky_logs/jobs_controller'

JOBS_TASK_YAML_PREFIX = '~/.sky/managed_jobs'

Expand Down
8 changes: 3 additions & 5 deletions sky/jobs/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@


def maybe_start_waiting_jobs() -> None:
"""Determine if any managed jobs can be launched, and if so, launch them.
"""Determine if any managed jobs can be scheduled, and if so, schedule them.
For newly submitted jobs, this includes starting the job controller
process. For jobs that are already alive but are waiting to launch a new
Expand All @@ -76,7 +76,6 @@ def maybe_start_waiting_jobs() -> None:
the jobs controller. New job controller processes will be detached from the
current process and there will not be a parent/child relationship - see
launch_new_process_tree for more.
"""
try:
# We must use a global lock rather than a per-job lock to ensure correct
Expand Down Expand Up @@ -112,7 +111,7 @@ def maybe_start_waiting_jobs() -> None:
# Can't schedule anything, break from scheduling loop.
break

logger.info(f'Scheduling job {maybe_next_job["job_id"]}')
logger.debug(f'Scheduling job {maybe_next_job["job_id"]}')
state.scheduler_set_launching(maybe_next_job['job_id'],
current_state)

Expand All @@ -136,7 +135,7 @@ def maybe_start_waiting_jobs() -> None:
run_cmd, log_output=log_path)
state.set_job_controller_pid(job_id, pid)

logger.info(f'Job {job_id} started with pid {pid}')
logger.debug(f'Job {job_id} started with pid {pid}')

except filelock.Timeout:
# If we can't get the lock, just exit. The process holding the lock
Expand Down Expand Up @@ -203,7 +202,6 @@ def job_done(job_id: int, idempotent: bool = False) -> None:
The job could be in any terminal ManagedJobStatus. However, once DONE, it
should never transition back to another state.
"""
if idempotent and (state.get_job_schedule_state(job_id)
== state.ManagedJobScheduleState.DONE):
Expand Down
8 changes: 5 additions & 3 deletions sky/jobs/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,9 +827,11 @@ def scheduler_set_done(job_id: int, idempotent: bool = False) -> None:

def set_job_controller_pid(job_id: int, pid: int):
with db_utils.safe_cursor(_DB_PATH) as cursor:
# XXX cooperc
cursor.execute(
f'UPDATE job_info SET pid={pid} WHERE spot_job_id={job_id!r}')
updated_count = cursor.execute(
'UPDATE job_info SET'
'pid = (?) '
'WHERE spot_job_id = (?)', (pid, job_id)).rowcount
assert updated_count == 1, (job_id, updated_count)


def get_job_schedule_state(job_id: int) -> ManagedJobScheduleState:
Expand Down

0 comments on commit aeaaf7b

Please sign in to comment.