Skip to content

Commit

Permalink
Clean all user_dict functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
notoraptor committed Jul 12, 2024
1 parent 657437c commit 7ab61e7
Show file tree
Hide file tree
Showing 8 changed files with 1 addition and 480 deletions.
81 changes: 0 additions & 81 deletions clockwork_tools/clockwork_tools/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,45 +216,6 @@ def delete_user_props(
params = {"job_id": job_id, "cluster_name": cluster_name, "keys": keys}
return self._request(endpoint, params, method="PUT")

def jobs_user_dict_update(
self, job_id: str = None, cluster_name: str = None, update_pairs: dict = {}
) -> dict[str, any]:
"""REST call to api/v1/clusters/jobs/user_dict_update.
Updates the "user" component of a job that belongs to
the user calling this function. Allows for any number
of fields to be set.
If there is no ambiguity with the job_id,
the cluster_name argument is not required.
Fails when the job does not belong to the user,
or when the job is not yet in the database
(such as during delays between the creation of the job
and its presence in the database).
Args:
job_id (str): job_id to be described (in terms of Slurm terminology)
cluster_name (str): Name of cluster where that job is running.
update_pairs (dict): Updates to perform to the user dict.
Returns:
dict[any,any]: Returns the updated user dict.
"""
endpoint = "api/v1/clusters/jobs/user_dict_update"
params = {}

for (k, a) in [
("job_id", job_id),
("cluster_name", cluster_name),
]:
if a is not None:
params[k] = a
# Due to current constraints, we have to pass "update_pairs"
# as a string representing a structure in json.
params["update_pairs"] = json.dumps(update_pairs)
return self._request(endpoint, params, method="PUT", send_json=False)

def nodes_list(self, cluster_name: str = None) -> list[dict[str, any]]:
"""REST call to api/v1/clusters/nodes/list.
Expand Down Expand Up @@ -435,48 +396,6 @@ def jobs_one(
)
return super().jobs_one(**params)

def jobs_user_dict_update(
self,
job_id: str = None,
cluster_name: str = None,
update_pairs: dict = {},
target_self: bool = True,
) -> dict[str, any]:
"""REST call to api/v1/clusters/jobs/user_dict_update.
Updates the "user" component of a job that belongs to
the user calling this function. Allows for any number
of fields to be set.
If there is no ambiguity with the job_id,
the cluster_name argument is not required.
Fails when the job does not belong to the user,
or when the job is not yet in the database
(such as during delays between the creation of the job
and its presence in the database).
Args:
job_id (str): job_id to be described (in terms of Slurm terminology)
cluster_name (str): Name of cluster where that job is running.
update_pairs (dict): Updates to perform to the user dict.
target_self (bool): Inside a Slurm job, automatically infer arguments
to target this specific Slurm job.
Returns:
dict[any,any]: Returns the updated user dict.
"""
params = self._create_params_for_request(
target_self=target_self, job_id=job_id, cluster_name=cluster_name
)
# Due to current constraints, we have to pass "update_pairs"
# as a string representing a structure in json.
# However, since the base class does the json.dump, we don't
# want to do it twice. We therefore have to show some restraint.
# params["update_pairs"] = json.dumps(update_pairs) # NOT THAT
params["update_pairs"] = update_pairs
return super().jobs_user_dict_update(**params)

def nodes_list(self, cluster_name: str = None) -> list[dict[str, any]]:
"""REST call to api/v1/clusters/nodes/list.
Expand Down
35 changes: 0 additions & 35 deletions clockwork_tools_test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,38 +89,3 @@ def unauthorized_mtclient_00(invalid_config_00, db_with_fake_data):
@pytest.fixture
def unauthorized_mtclient_01(invalid_config_01, db_with_fake_data):
return clockwork_tools.client.ClockworkToolsClient(**invalid_config_01)


"""
There is a test called `test_jobs_user_dict_update` in "test_mt_jobs.py"
and it requires a particular set of text fixtures compared to the other
tests from the same file. Its fixture `mtclient_student01` comes from "conftest.py"
in the same directory, which itself refers to a particular job that was
hardcoded to be inserted in the database (opposed to being inserted by "fake_data.json").
This is because we require additional permissions in order to modify the "user"
portion of a job in the database, which means that the usual test fixture
`mtclient(config, db_with_fake_data)` from "conftest.py" doesn't satisfy the requirements.
There is no guarantee that this `mtclient` fixture would indeed have any of
its own jobs present in the "fake_data.json" used to populate the database initially.
That explains the acrobatics done in the file "insert_hardcoded_values.py".
"""


@pytest.fixture
def mtclient_student01(db_with_fake_data):
"""
This is a little too "hardcoded" in terms of featuring
data straight from fake_data.json added manually here,
but we need something like that to test `jobs_user_dict_update`.
See more detailed explanation above in the source file.
"""
return clockwork_tools.client.ClockworkToolsClient(
**{
"host": os.environ["clockwork_tools_test_HOST"],
"port": os.environ["clockwork_tools_test_PORT"],
"email": "[email protected]",
"clockwork_api_key": "000aaa01",
}
)
36 changes: 0 additions & 36 deletions clockwork_tools_test/test_mt_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,39 +57,3 @@ def test_list_jobs_for_a_given_random_user(mtclient, fake_data):
validator, username = helper_list_jobs_for_a_given_random_user(fake_data)
LD_jobs = mtclient.jobs_list(username=username)
validator(LD_jobs)


def test_jobs_user_dict_update(mtclient_student01):
"""
Verify that we can modify that particular job for that user.
We omit the `fake_data` test fixture because we hardcode
certain details by hand.
Refer to the code for test fixture `mtclient_student01`
to see a longer discussion about the particular needs of this test.
"""

# set two fields and make sure they are read back properly
expected = {"pet_name": "fido", "nbr_cats": 10}
#
updated_user_dict = mtclient_student01.jobs_user_dict_update(
job_id="284357",
cluster_name="graham",
update_pairs={"pet_name": "fido", "nbr_cats": 10},
)
assert updated_user_dict == expected
#
D_job = mtclient_student01.jobs_one(job_id="284357", cluster_name="graham")
assert D_job["user"] == expected

# update one field and add a new one, then make sure all three are as expected
expected = {"pet_name": "garfield", "nbr_cats": 10, "nbr_dinosaurs": 42}
#
updated_user_dict = mtclient_student01.jobs_user_dict_update(
job_id="284357",
cluster_name="graham",
update_pairs={"pet_name": "garfield", "nbr_dinosaurs": 42},
)
assert updated_user_dict == expected
#
D_job = mtclient_student01.jobs_one(job_id="284357", cluster_name="graham")
assert D_job["user"] == expected
14 changes: 0 additions & 14 deletions clockwork_web/core/jobs_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,20 +339,6 @@ def get_jobs(
)


def update_job_user_dict(mongodb_filter: dict, new_user_dict: dict):
"""
This is a step that happens after every checks have been made.
It's the "now we actually do it" part of the sequence of operations.
`mongodb_filter` is to identify a job uniquely
`new_user_dict` is the value to replace the "user" field with
"""
mc = get_db()[current_app.config["MONGODB_DATABASE_NAME"]]
return mc["jobs"].update_one(
mongodb_filter, {"$set": {"user": new_user_dict}}, upsert=False
)


def strip_artificial_fields_from_job(D_job):
# Returns a copy. Does not mutate the original.
fields_to_remove = ["_id"]
Expand Down
159 changes: 0 additions & 159 deletions clockwork_web/rest_routes/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,162 +260,3 @@ def route_user_props_delete():
# Delete props, using current_user_id as mila email username.
delete_user_props(job_id, cluster_name, keys, current_user_id)
return jsonify("")


# Note that this whole `user_dict_update` thing needs to be rewritten
# in order to use Olivier's proposal about jobs properties
# being visible only to the users that set them,
# and where everyone can set properties on all jobs.


@flask_api.route("/jobs/user_dict_update", methods=["PUT"])
@authentication_required
def route_api_v1_jobs_user_dict_update():
"""
Performs an update to the user dict for a given job.
The request needs to contain "job_id" and "cluster_name"
to identify the job to be updated, and then it needs to contain
"update_pairs" which is a list of (k, v), as a json string,
that we should update in the `job["user"]` dict.
The reason for passing "update_pairs" as a string is that
Flask received it as `werkzeug.datastructures.FileStorage`
when it was sent as a dict instead of a string.
User info is in `g.current_user_with_rest_auth`.
Returns the updated user dict.
.. :quickref: update the user dict for a given Slurm job that belongs to the user
"""
# Retrieve the authentified user
current_user_id = g.current_user_with_rest_auth["mila_email_username"]
current_user = User.get(current_user_id)

logging.info(
f"clockwork REST route: /jobs/user_dict_update - current_user_with_rest_auth={current_user_id}"
)

# A 'PUT' method is generally to modify resources.
# Retrieve the provided job ID
job_id = request.values.get("job_id", None)
if job_id is None:
return jsonify("Missing argument job_id."), 400 # bad request

# Retrieve the provided cluster names
requested_cluster_names = get_custom_array_from_request_args(
request.args.get("cluster_name")
)
if len(requested_cluster_names) < 1:
# If no cluster has been requested, then all clusters have been requested
# (a filter related to which clusters are available to the current user
# is then applied)
requested_cluster_names = get_all_clusters()

# Limit the cluster options to the clusters the user can access
user_clusters = (
current_user.get_available_clusters()
) # Retrieve the clusters the user can access
cluster_names = [
cluster for cluster in requested_cluster_names if cluster in user_clusters
]

# If cluster_names is empty, then the user does not have access to any cluster (s)he
# requested. Thus, an error is returned
if len(cluster_names) < 1:
return (
jsonify("It seems you have no access to the requested cluster(s)."),
403,
)

# Note that we'll have to add some more fields when we make progress in CW-93.
# We are going to have to check for "array_job_id" and "array_task_id"
# if those are supplied to identify jobs in situations where the "job_id"
# are "cluster_name" are insufficient.
(LD_jobs, _) = get_jobs(job_ids=[job_id], cluster_names=cluster_names)
# Note that `filter` gets reused later to commit again to the database.

if len(LD_jobs) == 0:
# Note that, if we wanted to have "phantom entries" in some other collection,
# before the the jobs are present in the database, then we would need to adjust
# this code branch to allow for updates even when no such jobs are currently found.
return jsonify("Job not found in database."), 404
if len(LD_jobs) > 1:
resp = (
jsonify(
f"Found more than one job matching the criteria. This is probably not what was intended."
),
500,
)
return resp

D_job = LD_jobs[0]

assert (
hasattr(g, "current_user_with_rest_auth")
and g.current_user_with_rest_auth is not None
), "Authentication should have failed much earlier than this."

# Make use of `current_user_with_rest_auth`.
# If the user requesting the update is not the owner,
# then we refuse the update and return an error
# that describes the problem.

for key in ["mila_email_username", "mila_cluster_username", "cc_account_username"]:
# Be as strict as possible here. If the job entry
# contains any of the three types of usernames (and it's not `None`),
# than it must be matched against that of the user
# authenticated and submitting the request to modify
# the user_dict. Usernames that are `None` but disagree
# with each other will not cause failed matches.
if D_job["cw"].get(key, None):
if D_job["cw"][key] != g.current_user_with_rest_auth[key]:
return (
jsonify(
f'This job belongs to {D_job["cw"][key]} and not {g.current_user_with_rest_auth[key]}.'
),
403, ## response code for "authorization denied"
)

update_pairs = request.values.get("update_pairs", None)
if isinstance(update_pairs, dict):
# This won't happen, but it would be nice if that were possible
# instead of getting a `FileStorage` in Flask.
pass
elif update_pairs is None:
return jsonify(f"Missing 'update_pairs' from arguments."), 500
elif isinstance(update_pairs, str):
try:
update_pairs = json.loads(update_pairs)
except Exception as inst:
return jsonify(f"Failed to json.loads(update_pairs). \n{inst}."), 500
else:
return (
jsonify(
f"Field 'update_pairs' was not a string (encoding a json structure). {update_pairs}"
),
500,
)

new_user_dict = D_job["user"] | update_pairs
# We do an update with the database if we have an empty list
# for `update_pairs`, for the sake of more predictable behavior.

# We could reuse `filter` because we might as well refer to the job
# by its "_id" instead since we have it. Maybe this can mitigate the
# unlikely risk of `filter`` returning more matches than a moment ago
# because updates were made in-between.
# In any case, with the current setup we are still exposed to the
# possibility that rapid updates to the same job could compete with
# each other (and that's kinda fine).
mc = get_db()
result = mc["jobs"].update_one(
{"_id": D_job["_id"]}, {"$set": {"user": new_user_dict}}, upsert=False
)

# See "https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html"
# for the properties of the returned object.
if result.modified_count == 1:
return jsonify(new_user_dict), 200
else:
# Will that return a useful string as an error?
return jsonify(f"Problem during update of the user dict. {result}"), 500
Loading

0 comments on commit 7ab61e7

Please sign in to comment.