Clean all user_dict functions.

mila-iqia · Jul 12, 2024 · 7ab61e7 · 7ab61e7
1 parent 657437c
commit 7ab61e7
Show file tree

Hide file tree

Showing 8 changed files with 1 addition and 480 deletions.
diff --git a/clockwork_tools/clockwork_tools/client.py b/clockwork_tools/clockwork_tools/client.py
@@ -216,45 +216,6 @@ def delete_user_props(
         params = {"job_id": job_id, "cluster_name": cluster_name, "keys": keys}
         return self._request(endpoint, params, method="PUT")
 
-    def jobs_user_dict_update(
-        self, job_id: str = None, cluster_name: str = None, update_pairs: dict = {}
-    ) -> dict[str, any]:
-        """REST call to api/v1/clusters/jobs/user_dict_update.
-
-        Updates the "user" component of a job that belongs to
-        the user calling this function. Allows for any number
-        of fields to be set.
-
-        If there is no ambiguity with the job_id,
-        the cluster_name argument is not required.
-
-        Fails when the job does not belong to the user,
-        or when the job is not yet in the database
-        (such as during delays between the creation of the job
-        and its presence in the database).
-
-        Args:
-            job_id (str): job_id to be described (in terms of Slurm terminology)
-            cluster_name (str): Name of cluster where that job is running.
-            update_pairs (dict): Updates to perform to the user dict.
-
-        Returns:
-            dict[any,any]: Returns the updated user dict.
-        """
-        endpoint = "api/v1/clusters/jobs/user_dict_update"
-        params = {}
-
-        for (k, a) in [
-            ("job_id", job_id),
-            ("cluster_name", cluster_name),
-        ]:
-            if a is not None:
-                params[k] = a
-        # Due to current constraints, we have to pass "update_pairs"
-        # as a string representing a structure in json.
-        params["update_pairs"] = json.dumps(update_pairs)
-        return self._request(endpoint, params, method="PUT", send_json=False)
-
     def nodes_list(self, cluster_name: str = None) -> list[dict[str, any]]:
         """REST call to api/v1/clusters/nodes/list.
 
@@ -435,48 +396,6 @@ def jobs_one(
         )
         return super().jobs_one(**params)
 
-    def jobs_user_dict_update(
-        self,
-        job_id: str = None,
-        cluster_name: str = None,
-        update_pairs: dict = {},
-        target_self: bool = True,
-    ) -> dict[str, any]:
-        """REST call to api/v1/clusters/jobs/user_dict_update.
-
-        Updates the "user" component of a job that belongs to
-        the user calling this function. Allows for any number
-        of fields to be set.
-
-        If there is no ambiguity with the job_id,
-        the cluster_name argument is not required.
-
-        Fails when the job does not belong to the user,
-        or when the job is not yet in the database
-        (such as during delays between the creation of the job
-        and its presence in the database).
-
-        Args:
-            job_id (str): job_id to be described (in terms of Slurm terminology)
-            cluster_name (str): Name of cluster where that job is running.
-            update_pairs (dict): Updates to perform to the user dict.
-            target_self (bool): Inside a Slurm job, automatically infer arguments
-                                to target this specific Slurm job.
-
-        Returns:
-            dict[any,any]: Returns the updated user dict.
-        """
-        params = self._create_params_for_request(
-            target_self=target_self, job_id=job_id, cluster_name=cluster_name
-        )
-        # Due to current constraints, we have to pass "update_pairs"
-        # as a string representing a structure in json.
-        # However, since the base class does the json.dump, we don't
-        # want to do it twice. We therefore have to show some restraint.
-        # params["update_pairs"] = json.dumps(update_pairs)  # NOT THAT
-        params["update_pairs"] = update_pairs
-        return super().jobs_user_dict_update(**params)
-
     def nodes_list(self, cluster_name: str = None) -> list[dict[str, any]]:
         """REST call to api/v1/clusters/nodes/list.
 

diff --git a/clockwork_tools_test/conftest.py b/clockwork_tools_test/conftest.py
@@ -89,38 +89,3 @@ def unauthorized_mtclient_00(invalid_config_00, db_with_fake_data):
 @pytest.fixture
 def unauthorized_mtclient_01(invalid_config_01, db_with_fake_data):
     return clockwork_tools.client.ClockworkToolsClient(**invalid_config_01)
-
-
-"""
-There is a test called `test_jobs_user_dict_update` in "test_mt_jobs.py"
-and it requires a particular set of text fixtures compared to the other
-tests from the same file. Its fixture `mtclient_student01` comes from "conftest.py"
-in the same directory, which itself refers to a particular job that was
-hardcoded to be inserted in the database (opposed to being inserted by "fake_data.json").
-
-This is because we require additional permissions in order to modify the "user"
-portion of a job in the database, which means that the usual test fixture
-`mtclient(config, db_with_fake_data)` from "conftest.py" doesn't satisfy the requirements.
-
-There is no guarantee that this `mtclient` fixture would indeed have any of
-its own jobs present in the "fake_data.json" used to populate the database initially.
-That explains the acrobatics done in the file "insert_hardcoded_values.py".
-"""
-
-
-@pytest.fixture
-def mtclient_student01(db_with_fake_data):
-    """
-    This is a little too "hardcoded" in terms of featuring
-    data straight from fake_data.json added manually here,
-    but we need something like that to test `jobs_user_dict_update`.
-    See more detailed explanation above in the source file.
-    """
-    return clockwork_tools.client.ClockworkToolsClient(
-        **{
-            "host": os.environ["clockwork_tools_test_HOST"],
-            "port": os.environ["clockwork_tools_test_PORT"],
-            "email": "[email protected]",
-            "clockwork_api_key": "000aaa01",
-        }
-    )
diff --git a/clockwork_tools_test/test_mt_jobs.py b/clockwork_tools_test/test_mt_jobs.py
@@ -57,39 +57,3 @@ def test_list_jobs_for_a_given_random_user(mtclient, fake_data):
     validator, username = helper_list_jobs_for_a_given_random_user(fake_data)
     LD_jobs = mtclient.jobs_list(username=username)
     validator(LD_jobs)
-
-
-def test_jobs_user_dict_update(mtclient_student01):
-    """
-    Verify that we can modify that particular job for that user.
-    We omit the `fake_data` test fixture because we hardcode
-    certain details by hand.
-    Refer to the code for test fixture `mtclient_student01`
-    to see a longer discussion about the particular needs of this test.
-    """
-
-    # set two fields and make sure they are read back properly
-    expected = {"pet_name": "fido", "nbr_cats": 10}
-    #
-    updated_user_dict = mtclient_student01.jobs_user_dict_update(
-        job_id="284357",
-        cluster_name="graham",
-        update_pairs={"pet_name": "fido", "nbr_cats": 10},
-    )
-    assert updated_user_dict == expected
-    #
-    D_job = mtclient_student01.jobs_one(job_id="284357", cluster_name="graham")
-    assert D_job["user"] == expected
-
-    # update one field and add a new one, then make sure all three are as expected
-    expected = {"pet_name": "garfield", "nbr_cats": 10, "nbr_dinosaurs": 42}
-    #
-    updated_user_dict = mtclient_student01.jobs_user_dict_update(
-        job_id="284357",
-        cluster_name="graham",
-        update_pairs={"pet_name": "garfield", "nbr_dinosaurs": 42},
-    )
-    assert updated_user_dict == expected
-    #
-    D_job = mtclient_student01.jobs_one(job_id="284357", cluster_name="graham")
-    assert D_job["user"] == expected
diff --git a/clockwork_web/core/jobs_helper.py b/clockwork_web/core/jobs_helper.py
@@ -339,20 +339,6 @@ def get_jobs(
     )
 
 
-def update_job_user_dict(mongodb_filter: dict, new_user_dict: dict):
-    """
-    This is a step that happens after every checks have been made.
-    It's the "now we actually do it" part of the sequence of operations.
-
-    `mongodb_filter` is to identify a job uniquely
-    `new_user_dict` is the value to replace the "user" field with
-    """
-    mc = get_db()[current_app.config["MONGODB_DATABASE_NAME"]]
-    return mc["jobs"].update_one(
-        mongodb_filter, {"$set": {"user": new_user_dict}}, upsert=False
-    )
-
-
 def strip_artificial_fields_from_job(D_job):
     # Returns a copy. Does not mutate the original.
     fields_to_remove = ["_id"]

diff --git a/clockwork_web/rest_routes/jobs.py b/clockwork_web/rest_routes/jobs.py
@@ -260,162 +260,3 @@ def route_user_props_delete():
     # Delete props, using current_user_id as mila email username.
     delete_user_props(job_id, cluster_name, keys, current_user_id)
     return jsonify("")
-
-
-# Note that this whole `user_dict_update` thing needs to be rewritten
-# in order to use Olivier's proposal about jobs properties
-# being visible only to the users that set them,
-# and where everyone can set properties on all jobs.
-
-
-@flask_api.route("/jobs/user_dict_update", methods=["PUT"])
-@authentication_required
-def route_api_v1_jobs_user_dict_update():
-    """
-        Performs an update to the user dict for a given job.
-        The request needs to contain "job_id" and "cluster_name"
-        to identify the job to be updated, and then it needs to contain
-        "update_pairs" which is a list of (k, v), as a json string,
-        that we should update in the `job["user"]` dict.
-        The reason for passing "update_pairs" as a string is that
-        Flask received it as `werkzeug.datastructures.FileStorage`
-        when it was sent as a dict instead of a string.
-
-        User info is in `g.current_user_with_rest_auth`.
-
-        Returns the updated user dict.
-
-    .. :quickref: update the user dict for a given Slurm job that belongs to the user
-    """
-    # Retrieve the authentified user
-    current_user_id = g.current_user_with_rest_auth["mila_email_username"]
-    current_user = User.get(current_user_id)
-
-    logging.info(
-        f"clockwork REST route: /jobs/user_dict_update - current_user_with_rest_auth={current_user_id}"
-    )
-
-    # A 'PUT' method is generally to modify resources.
-    # Retrieve the provided job ID
-    job_id = request.values.get("job_id", None)
-    if job_id is None:
-        return jsonify("Missing argument job_id."), 400  # bad request
-
-    # Retrieve the provided cluster names
-    requested_cluster_names = get_custom_array_from_request_args(
-        request.args.get("cluster_name")
-    )
-    if len(requested_cluster_names) < 1:
-        # If no cluster has been requested, then all clusters have been requested
-        # (a filter related to which clusters are available to the current user
-        #  is then applied)
-        requested_cluster_names = get_all_clusters()
-
-    # Limit the cluster options to the clusters the user can access
-    user_clusters = (
-        current_user.get_available_clusters()
-    )  # Retrieve the clusters the user can access
-    cluster_names = [
-        cluster for cluster in requested_cluster_names if cluster in user_clusters
-    ]
-
-    # If cluster_names is empty, then the user does not have access to any cluster (s)he
-    # requested. Thus, an error is returned
-    if len(cluster_names) < 1:
-        return (
-            jsonify("It seems you have no access to the requested cluster(s)."),
-            403,
-        )
-
-    # Note that we'll have to add some more fields when we make progress in CW-93.
-    # We are going to have to check for "array_job_id" and "array_task_id"
-    # if those are supplied to identify jobs in situations where the "job_id"
-    # are "cluster_name" are insufficient.
-    (LD_jobs, _) = get_jobs(job_ids=[job_id], cluster_names=cluster_names)
-    # Note that `filter` gets reused later to commit again to the database.
-
-    if len(LD_jobs) == 0:
-        # Note that, if we wanted to have "phantom entries" in some other collection,
-        # before the the jobs are present in the database, then we would need to adjust
-        # this code branch to allow for updates even when no such jobs are currently found.
-        return jsonify("Job not found in database."), 404
-    if len(LD_jobs) > 1:
-        resp = (
-            jsonify(
-                f"Found more than one job matching the criteria. This is probably not what was intended."
-            ),
-            500,
-        )
-        return resp
-
-    D_job = LD_jobs[0]
-
-    assert (
-        hasattr(g, "current_user_with_rest_auth")
-        and g.current_user_with_rest_auth is not None
-    ), "Authentication should have failed much earlier than this."
-
-    # Make use of `current_user_with_rest_auth`.
-    # If the user requesting the update is not the owner,
-    # then we refuse the update and return an error
-    # that describes the problem.
-
-    for key in ["mila_email_username", "mila_cluster_username", "cc_account_username"]:
-        # Be as strict as possible here. If the job entry
-        # contains any of the three types of usernames (and it's not `None`),
-        # than it must be matched against that of the user
-        # authenticated and submitting the request to modify
-        # the user_dict. Usernames that are `None` but disagree
-        # with each other will not cause failed matches.
-        if D_job["cw"].get(key, None):
-            if D_job["cw"][key] != g.current_user_with_rest_auth[key]:
-                return (
-                    jsonify(
-                        f'This job belongs to {D_job["cw"][key]} and not {g.current_user_with_rest_auth[key]}.'
-                    ),
-                    403,  ## response code for "authorization denied"
-                )
-
-    update_pairs = request.values.get("update_pairs", None)
-    if isinstance(update_pairs, dict):
-        # This won't happen, but it would be nice if that were possible
-        # instead of getting a `FileStorage` in Flask.
-        pass
-    elif update_pairs is None:
-        return jsonify(f"Missing 'update_pairs' from arguments."), 500
-    elif isinstance(update_pairs, str):
-        try:
-            update_pairs = json.loads(update_pairs)
-        except Exception as inst:
-            return jsonify(f"Failed to json.loads(update_pairs). \n{inst}."), 500
-    else:
-        return (
-            jsonify(
-                f"Field 'update_pairs' was not a string (encoding a json structure). {update_pairs}"
-            ),
-            500,
-        )
-
-    new_user_dict = D_job["user"] | update_pairs
-    # We do an update with the database if we have an empty list
-    # for `update_pairs`, for the sake of more predictable behavior.
-
-    # We could reuse `filter` because we might as well refer to the job
-    # by its "_id" instead since we have it. Maybe this can mitigate the
-    # unlikely risk of `filter`` returning more matches than a moment ago
-    # because updates were made in-between.
-    # In any case, with the current setup we are still exposed to the
-    # possibility that rapid updates to the same job could compete with
-    # each other (and that's kinda fine).
-    mc = get_db()
-    result = mc["jobs"].update_one(
-        {"_id": D_job["_id"]}, {"$set": {"user": new_user_dict}}, upsert=False
-    )
-
-    # See "https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html"
-    # for the properties of the returned object.
-    if result.modified_count == 1:
-        return jsonify(new_user_dict), 200
-    else:
-        # Will that return a useful string as an error?
-        return jsonify(f"Problem during update of the user dict. {result}"), 500