Skip to content

Commit

Permalink
Merge branch 'master' of github.com:mila-iqia/clockwork into update-u…
Browse files Browse the repository at this point in the history
…ser-page
  • Loading branch information
soline-b committed Aug 16, 2023
2 parents 793e484 + 46a04fc commit 42be4fb
Show file tree
Hide file tree
Showing 51 changed files with 8,127 additions and 6,324 deletions.
12 changes: 12 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
BSD License

Copyright (c) 2023, Mila
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of Mila nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY MILA "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MILA BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 changes: 18 additions & 17 deletions clockwork_tools/clockwork_tools/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _request(self, endpoint, params, method="GET"):
response = requests.put(
complete_address, data=params, headers=self._get_headers()
)
print(response)

# Check code instead and raise exception if it's the wrong one.
if response.status_code == 200:
return response.json()
Expand Down Expand Up @@ -279,24 +279,25 @@ def __init__(
# Otherwise, try to read them from the environment. Nothing in Clockwork
# can work without some form of authentication, so we insist on finding
# those values somewhere.
if clockwork_api_key:
self.clockwork_api_key = clockwork_api_key
elif "CLOCKWORK_API_KEY" in os.environ and os.environ["CLOCKWORK_API_KEY"]:
self.clockwork_api_key = os.environ["CLOCKWORK_API_KEY"]
else:
raise Exception(
f"Invalid clockwork_api_key argument or missing from environment."
)

if email:
self.email = email
elif "CLOCKWORK_EMAIL" in os.environ and os.environ["CLOCKWORK_EMAIL"]:
self.email = os.environ["CLOCKWORK_EMAIL"]
else:
raise Exception(f"Invalid email argument or missing from environment.")
if not clockwork_api_key:
if "CLOCKWORK_API_KEY" in os.environ and os.environ["CLOCKWORK_API_KEY"]:
clockwork_api_key = os.environ["CLOCKWORK_API_KEY"]
else:
raise Exception(
f"Invalid clockwork_api_key argument or missing from environment."
)

if not email:
if "CLOCKWORK_EMAIL" in os.environ and os.environ["CLOCKWORK_EMAIL"]:
email = os.environ["CLOCKWORK_EMAIL"]
else:
raise Exception(f"Invalid email argument or missing from environment.")

super().__init__(
email=email, clockwork_api_key=clockwork_api_key, host=host, port=port
email=email,
clockwork_api_key=clockwork_api_key,
host=host,
port=port,
)

# Additional feature on top of the parent class.
Expand Down
19 changes: 19 additions & 0 deletions clockwork_web/browser_routes/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from flask_babel import gettext

from clockwork_web.core.clusters_helper import get_all_clusters
from clockwork_web.core.jobs_helper import get_jobs
from clockwork_web.core.users_helper import render_template_with_user_settings

flask_api = Blueprint("clusters", __name__)
Expand Down Expand Up @@ -79,6 +80,24 @@ def route_one():
)

else:
# Add supplementary information to the cluster to be displayed.
# We add it here instead of above because we don't want to spend time
# generating those info for all clusters, as we just want to display one.

# get job slurm updates.
jobs, _ = get_jobs(cluster_names=[cluster_name])
job_dates = [
job["cw"]["last_slurm_update"]
for job in jobs
if "last_slurm_update" in job["cw"]
]
# Save min and max dates for jobs.
if job_dates:
D_clusters[cluster_name]["job_dates"] = {
"min": min(job_dates),
"max": max(job_dates),
}

# Return a HTML page presenting the requested cluster's information
return render_template_with_user_settings(
"cluster.html",
Expand Down
3 changes: 2 additions & 1 deletion clockwork_web/browser_routes/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ def route_one():
"error.html",
error_msg=gettext("Missing argument job_id."),
previous_request_args=previous_request_args,
error_code=400,
),
400,
) # bad request
Expand Down Expand Up @@ -249,7 +250,7 @@ def route_one():
"error.html",
error_msg=gettext(
"Found %(len_LD_jobs) jobs with job_id %(job_id)."
).format(len_LD_jobs=len(LD_jobs), job_id=job_id),
).format(len_LD_jobs=len(LD_jobs), job_id=job_ids[0]),
previous_request_args=previous_request_args,
) # Not sure what to do about these cases.

Expand Down
2 changes: 1 addition & 1 deletion clockwork_web/browser_routes/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,6 @@ def set_up_cluster_names_and_node_name_filters(cluster_names=[], node_name=None)
# for the user
cluster_names = user_clusters

f1 = {"slurm.cluster_name": {"$in": user_clusters}}
f1 = {"slurm.cluster_name": {"$in": cluster_names}}

return [f0, f1]
78 changes: 78 additions & 0 deletions clockwork_web/browser_routes/status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Browser routes dealing with the "cluster" entity
"""
import logging

from flask import Blueprint, request
from flask_login import current_user, login_required
from flask_babel import gettext

from clockwork_web.core.clusters_helper import get_all_clusters
from clockwork_web.core.jobs_helper import get_jobs
from clockwork_web.core.users_helper import (
render_template_with_user_settings,
get_users,
)

flask_api = Blueprint("status", __name__)


@flask_api.route("/")
@login_required
def route_status():
"""Display status about clusters available for connected user."""
logging.info(
f"clockwork_web route: /clusters/status - current_user={current_user.mila_email_username}"
)

users = get_users()

# Count users.
nb_users = len(users)

# Count enabled users.
nb_enabled_users = sum(
(1 for user in users if user["status"] == "enabled"), start=0
)

# Count users that have a DRAC account.
# User has a DRAC account if user dict contains a valid value for field "cc_account_username".
nb_drac_users = sum(
(1 for user in users if user.get("cc_account_username", None)), start=0
)

# Collect clusters status:
# - Count number of jobs per cluster.
# - Get oldest and latest job modification dates in each cluster.
D_all_clusters = get_all_clusters()
clusters = {}
for current_cluster_name in D_all_clusters:
jobs, _ = get_jobs(cluster_names=[current_cluster_name])
job_dates = [
job["cw"]["last_slurm_update"]
for job in jobs
if "last_slurm_update" in job["cw"]
]
clusters[current_cluster_name] = {
"display_order": D_all_clusters[current_cluster_name]["display_order"],
"nb_jobs": len(jobs),
}
if job_dates:
clusters[current_cluster_name]["job_dates"] = {
"min": min(job_dates),
"max": max(job_dates),
}

server_status = {
"nb_users": nb_users,
"nb_enabled_users": nb_enabled_users,
"nb_drac_users": nb_drac_users,
"clusters": clusters or None,
}

return render_template_with_user_settings(
"status.html",
server_status=server_status,
mila_email_username=current_user.mila_email_username,
previous_request_args={},
)
4 changes: 2 additions & 2 deletions clockwork_web/core/jobs_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def get_filtered_and_paginated_jobs(
nbr_items_to_display=None,
want_count=False,
sort_by="submit_time",
sort_asc=1,
sort_asc=-1,
):
"""
Talk to the database and get the information.
Expand Down Expand Up @@ -221,7 +221,7 @@ def get_jobs(
nbr_items_to_display=None,
want_count=False,
sort_by="submit_time",
sort_asc=1,
sort_asc=-1,
):
"""
Set up the filters according to the parameters and retrieve the requested jobs from the database.
Expand Down
18 changes: 16 additions & 2 deletions clockwork_web/core/search_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,28 @@ def parse_search_request(user, args, force_pagination=True):
job_states = get_inferred_job_states(aggregated_job_states)
job_states += get_custom_array_from_request_args(args.get("job_state"))

job_ids = get_custom_array_from_request_args(args.get("job_id"))
# Set default value of sort_asc
sort_by = args.get("sort_by", default="submit_time", type=str)
sort_asc = args.get("sort_asc", default=0, type=int)
if sort_asc not in (-1, 1):
if sort_by in ["cluster_name", "user", "name", "job_state"]:
# Default value of sort_asc is ascending in these cases
sort_asc = 1
else:
# Default value of sort_asc is descending otherwise
sort_asc = -1

query = SimpleNamespace(
username=args.get("username"),
cluster_name=cluster_names,
aggregated_job_state=aggregated_job_states,
job_state=job_states,
job_ids=job_ids,
pagination_page_num=args.get("page_num", type=int, default=default_page_number),
pagination_nbr_items_per_page=args.get("nbr_items_per_page", type=int),
sort_by=args.get("sort_by", default="submit_time", type=str),
sort_asc=args.get("sort_asc", default=1, type=int),
sort_by=sort_by,
sort_asc=sort_asc,
want_count=want_count,
)

Expand Down Expand Up @@ -91,6 +104,7 @@ def search_request(user, args, force_pagination=True):
username=query.username,
cluster_names=query.cluster_name,
job_states=query.job_state,
job_ids=query.job_ids,
nbr_skipped_items=query.nbr_skipped_items,
nbr_items_to_display=query.nbr_items_to_display,
want_count=force_pagination
Expand Down
54 changes: 53 additions & 1 deletion clockwork_web/core/users_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Helper functions related to the User entity and the users entries from the databas.
"""

from datetime import datetime, timedelta
from flask_login import current_user
from flask import render_template
import json
Expand All @@ -17,7 +18,7 @@
string as valid_string,
)
from clockwork_web.core.clusters_helper import get_all_clusters, get_account_fields
from clockwork_web.core.jobs_helper import get_jobs_properties_list_per_page
from clockwork_web.core.jobs_helper import get_jobs_properties_list_per_page, get_jobs

from clockwork_web.core.utils import (
get_available_date_formats,
Expand Down Expand Up @@ -300,6 +301,19 @@ def get_users_one(mila_email_username):
return user


def get_users():
"""
Retrieve all users from the database.
Returns:
A list of dictionaries presenting users.
"""
# Retrieve the users collection from the database
users_collection = get_db()["users"]
users = users_collection.find({})
return list(users)


def get_available_clusters_from_user_dict(D_user):
"""
Retrieve the clusters a user can access.
Expand Down Expand Up @@ -567,5 +581,43 @@ def render_template_with_user_settings(template_name_or_list, **context):

# Send the clusters infos to the template
context["clusters"] = get_all_clusters()
# List clusters available for connected user,
# or set an empty list for anon user.
context["user_clusters"] = (
[]
if current_user.mila_email_username == "[email protected]"
else current_user.get_available_clusters()
)

# Get cluster status (if jobs are old and cluster has error).
for cluster_name in context["clusters"]:
# Default status values.
jobs_are_old = False
cluster_has_error = False

# Check if jobs are old.
jobs, _ = get_jobs(cluster_names=[cluster_name])
job_dates = [
job["cw"]["last_slurm_update"]
for job in jobs
if "last_slurm_update" in job["cw"]
]
if job_dates:
most_recent_job_edition = max(job_dates)
current_timestamp = datetime.now().timestamp()
elapsed_time = timedelta(
seconds=current_timestamp - most_recent_job_edition
)
# Let's say the latest jobs edition must not be older than 30 days ago.
max_delay = timedelta(days=30)
jobs_are_old = elapsed_time > max_delay

# Cluster error cannot yet be checked, so
# cluster_has_error is always False for now.

context["clusters"][cluster_name]["status"] = {
"jobs_are_old": jobs_are_old,
"cluster_has_error": cluster_has_error,
}

return render_template(template_name_or_list, **context)
Loading

0 comments on commit 42be4fb

Please sign in to comment.