Merge branch 'master' of github.com:mila-iqia/clockwork into update-u…

…ser-page
mila-iqia · Aug 16, 2023 · 42be4fb · 42be4fb
2 parents 793e484 + 46a04fc
commit 42be4fb
Show file tree

Hide file tree

Showing 51 changed files with 8,127 additions and 6,324 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,12 @@
+BSD License
+
+Copyright (c) 2023, Mila
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* Neither the name of Mila nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY MILA "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MILA BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/clockwork_tools/clockwork_tools/client.py b/clockwork_tools/clockwork_tools/client.py
@@ -89,7 +89,7 @@ def _request(self, endpoint, params, method="GET"):
             response = requests.put(
                 complete_address, data=params, headers=self._get_headers()
             )
-        print(response)
+
         # Check code instead and raise exception if it's the wrong one.
         if response.status_code == 200:
             return response.json()
@@ -279,24 +279,25 @@ def __init__(
         # Otherwise, try to read them from the environment. Nothing in Clockwork
         # can work without some form of authentication, so we insist on finding
         # those values somewhere.
-        if clockwork_api_key:
-            self.clockwork_api_key = clockwork_api_key
-        elif "CLOCKWORK_API_KEY" in os.environ and os.environ["CLOCKWORK_API_KEY"]:
-            self.clockwork_api_key = os.environ["CLOCKWORK_API_KEY"]
-        else:
-            raise Exception(
-                f"Invalid clockwork_api_key argument or missing from environment."
-            )
-
-        if email:
-            self.email = email
-        elif "CLOCKWORK_EMAIL" in os.environ and os.environ["CLOCKWORK_EMAIL"]:
-            self.email = os.environ["CLOCKWORK_EMAIL"]
-        else:
-            raise Exception(f"Invalid email argument or missing from environment.")
+        if not clockwork_api_key:
+            if "CLOCKWORK_API_KEY" in os.environ and os.environ["CLOCKWORK_API_KEY"]:
+                clockwork_api_key = os.environ["CLOCKWORK_API_KEY"]
+            else:
+                raise Exception(
+                    f"Invalid clockwork_api_key argument or missing from environment."
+                )
+
+        if not email:
+            if "CLOCKWORK_EMAIL" in os.environ and os.environ["CLOCKWORK_EMAIL"]:
+                email = os.environ["CLOCKWORK_EMAIL"]
+            else:
+                raise Exception(f"Invalid email argument or missing from environment.")
 
         super().__init__(
-            email=email, clockwork_api_key=clockwork_api_key, host=host, port=port
+            email=email,
+            clockwork_api_key=clockwork_api_key,
+            host=host,
+            port=port,
         )
 
         # Additional feature on top of the parent class.

diff --git a/clockwork_web/browser_routes/clusters.py b/clockwork_web/browser_routes/clusters.py
@@ -8,6 +8,7 @@
 from flask_babel import gettext
 
 from clockwork_web.core.clusters_helper import get_all_clusters
+from clockwork_web.core.jobs_helper import get_jobs
 from clockwork_web.core.users_helper import render_template_with_user_settings
 
 flask_api = Blueprint("clusters", __name__)
@@ -79,6 +80,24 @@ def route_one():
             )
 
         else:
+            # Add supplementary information to the cluster to be displayed.
+            # We add it here instead of above because we don't want to spend time
+            # generating those info for all clusters, as we just want to display one.
+
+            # get job slurm updates.
+            jobs, _ = get_jobs(cluster_names=[cluster_name])
+            job_dates = [
+                job["cw"]["last_slurm_update"]
+                for job in jobs
+                if "last_slurm_update" in job["cw"]
+            ]
+            # Save min and max dates for jobs.
+            if job_dates:
+                D_clusters[cluster_name]["job_dates"] = {
+                    "min": min(job_dates),
+                    "max": max(job_dates),
+                }
+
             # Return a HTML page presenting the requested cluster's information
             return render_template_with_user_settings(
                 "cluster.html",

diff --git a/clockwork_web/browser_routes/jobs.py b/clockwork_web/browser_routes/jobs.py
@@ -220,6 +220,7 @@ def route_one():
                 "error.html",
                 error_msg=gettext("Missing argument job_id."),
                 previous_request_args=previous_request_args,
+                error_code=400,
             ),
             400,
         )  # bad request
@@ -249,7 +250,7 @@ def route_one():
             "error.html",
             error_msg=gettext(
                 "Found %(len_LD_jobs) jobs with job_id %(job_id)."
-            ).format(len_LD_jobs=len(LD_jobs), job_id=job_id),
+            ).format(len_LD_jobs=len(LD_jobs), job_id=job_ids[0]),
             previous_request_args=previous_request_args,
         )  # Not sure what to do about these cases.
 

diff --git a/clockwork_web/browser_routes/nodes.py b/clockwork_web/browser_routes/nodes.py
@@ -247,6 +247,6 @@ def set_up_cluster_names_and_node_name_filters(cluster_names=[], node_name=None)
         # for the user
         cluster_names = user_clusters
 
-    f1 = {"slurm.cluster_name": {"$in": user_clusters}}
+    f1 = {"slurm.cluster_name": {"$in": cluster_names}}
 
     return [f0, f1]
diff --git a/clockwork_web/browser_routes/status.py b/clockwork_web/browser_routes/status.py
@@ -0,0 +1,78 @@
+"""
+Browser routes dealing with the "cluster" entity
+"""
+import logging
+
+from flask import Blueprint, request
+from flask_login import current_user, login_required
+from flask_babel import gettext
+
+from clockwork_web.core.clusters_helper import get_all_clusters
+from clockwork_web.core.jobs_helper import get_jobs
+from clockwork_web.core.users_helper import (
+    render_template_with_user_settings,
+    get_users,
+)
+
+flask_api = Blueprint("status", __name__)
+
+
+@flask_api.route("/")
+@login_required
+def route_status():
+    """Display status about clusters available for connected user."""
+    logging.info(
+        f"clockwork_web route: /clusters/status  - current_user={current_user.mila_email_username}"
+    )
+
+    users = get_users()
+
+    # Count users.
+    nb_users = len(users)
+
+    # Count enabled users.
+    nb_enabled_users = sum(
+        (1 for user in users if user["status"] == "enabled"), start=0
+    )
+
+    # Count users that have a DRAC account.
+    # User has a DRAC account if user dict contains a valid value for field "cc_account_username".
+    nb_drac_users = sum(
+        (1 for user in users if user.get("cc_account_username", None)), start=0
+    )
+
+    # Collect clusters status:
+    # - Count number of jobs per cluster.
+    # - Get oldest and latest job modification dates in each cluster.
+    D_all_clusters = get_all_clusters()
+    clusters = {}
+    for current_cluster_name in D_all_clusters:
+        jobs, _ = get_jobs(cluster_names=[current_cluster_name])
+        job_dates = [
+            job["cw"]["last_slurm_update"]
+            for job in jobs
+            if "last_slurm_update" in job["cw"]
+        ]
+        clusters[current_cluster_name] = {
+            "display_order": D_all_clusters[current_cluster_name]["display_order"],
+            "nb_jobs": len(jobs),
+        }
+        if job_dates:
+            clusters[current_cluster_name]["job_dates"] = {
+                "min": min(job_dates),
+                "max": max(job_dates),
+            }
+
+    server_status = {
+        "nb_users": nb_users,
+        "nb_enabled_users": nb_enabled_users,
+        "nb_drac_users": nb_drac_users,
+        "clusters": clusters or None,
+    }
+
+    return render_template_with_user_settings(
+        "status.html",
+        server_status=server_status,
+        mila_email_username=current_user.mila_email_username,
+        previous_request_args={},
+    )
diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
@@ -78,7 +78,7 @@ def get_filtered_and_paginated_jobs(
     nbr_items_to_display=None,
     want_count=False,
     sort_by="submit_time",
-    sort_asc=1,
+    sort_asc=-1,
 ):
     """
     Talk to the database and get the information.
@@ -221,7 +221,7 @@ def get_jobs(
     nbr_items_to_display=None,
     want_count=False,
     sort_by="submit_time",
-    sort_asc=1,
+    sort_asc=-1,
 ):
     """
     Set up the filters according to the parameters and retrieve the requested jobs from the database.

diff --git a/clockwork_web/core/search_helper.py b/clockwork_web/core/search_helper.py
@@ -45,15 +45,28 @@ def parse_search_request(user, args, force_pagination=True):
     job_states = get_inferred_job_states(aggregated_job_states)
     job_states += get_custom_array_from_request_args(args.get("job_state"))
 
+    job_ids = get_custom_array_from_request_args(args.get("job_id"))
+    # Set default value of sort_asc
+    sort_by = args.get("sort_by", default="submit_time", type=str)
+    sort_asc = args.get("sort_asc", default=0, type=int)
+    if sort_asc not in (-1, 1):
+        if sort_by in ["cluster_name", "user", "name", "job_state"]:
+            # Default value of sort_asc is ascending in these cases
+            sort_asc = 1
+        else:
+            # Default value of sort_asc is descending otherwise
+            sort_asc = -1
+
     query = SimpleNamespace(
         username=args.get("username"),
         cluster_name=cluster_names,
         aggregated_job_state=aggregated_job_states,
         job_state=job_states,
+        job_ids=job_ids,
         pagination_page_num=args.get("page_num", type=int, default=default_page_number),
         pagination_nbr_items_per_page=args.get("nbr_items_per_page", type=int),
-        sort_by=args.get("sort_by", default="submit_time", type=str),
-        sort_asc=args.get("sort_asc", default=1, type=int),
+        sort_by=sort_by,
+        sort_asc=sort_asc,
         want_count=want_count,
     )
 
@@ -91,6 +104,7 @@ def search_request(user, args, force_pagination=True):
         username=query.username,
         cluster_names=query.cluster_name,
         job_states=query.job_state,
+        job_ids=query.job_ids,
         nbr_skipped_items=query.nbr_skipped_items,
         nbr_items_to_display=query.nbr_items_to_display,
         want_count=force_pagination

diff --git a/clockwork_web/core/users_helper.py b/clockwork_web/core/users_helper.py
@@ -2,6 +2,7 @@
 Helper functions related to the User entity and the users entries from the databas.
 """
 
+from datetime import datetime, timedelta
 from flask_login import current_user
 from flask import render_template
 import json
@@ -17,7 +18,7 @@
     string as valid_string,
 )
 from clockwork_web.core.clusters_helper import get_all_clusters, get_account_fields
-from clockwork_web.core.jobs_helper import get_jobs_properties_list_per_page
+from clockwork_web.core.jobs_helper import get_jobs_properties_list_per_page, get_jobs
 
 from clockwork_web.core.utils import (
     get_available_date_formats,
@@ -300,6 +301,19 @@ def get_users_one(mila_email_username):
     return user
 
 
+def get_users():
+    """
+    Retrieve all users from the database.
+
+    Returns:
+        A list of dictionaries presenting users.
+    """
+    # Retrieve the users collection from the database
+    users_collection = get_db()["users"]
+    users = users_collection.find({})
+    return list(users)
+
+
 def get_available_clusters_from_user_dict(D_user):
     """
     Retrieve the clusters a user can access.
@@ -567,5 +581,43 @@ def render_template_with_user_settings(template_name_or_list, **context):
 
     # Send the clusters infos to the template
     context["clusters"] = get_all_clusters()
+    # List clusters available for connected user,
+    # or set an empty list for anon user.
+    context["user_clusters"] = (
+        []
+        if current_user.mila_email_username == "[email protected]"
+        else current_user.get_available_clusters()
+    )
+
+    # Get cluster status (if jobs are old and cluster has error).
+    for cluster_name in context["clusters"]:
+        # Default status values.
+        jobs_are_old = False
+        cluster_has_error = False
+
+        # Check if jobs are old.
+        jobs, _ = get_jobs(cluster_names=[cluster_name])
+        job_dates = [
+            job["cw"]["last_slurm_update"]
+            for job in jobs
+            if "last_slurm_update" in job["cw"]
+        ]
+        if job_dates:
+            most_recent_job_edition = max(job_dates)
+            current_timestamp = datetime.now().timestamp()
+            elapsed_time = timedelta(
+                seconds=current_timestamp - most_recent_job_edition
+            )
+            # Let's say the latest jobs edition must not be older than 30 days ago.
+            max_delay = timedelta(days=30)
+            jobs_are_old = elapsed_time > max_delay
+
+        # Cluster error cannot yet be checked, so
+        # cluster_has_error is always False for now.
+
+        context["clusters"][cluster_name]["status"] = {
+            "jobs_are_old": jobs_are_old,
+            "cluster_has_error": cluster_has_error,
+        }
 
     return render_template(template_name_or_list, **context)