Skip to content

Commit c1550ff

Browse files
committed
[pam_oar_adopt] Rework again
1 parent 5cb0364 commit c1550ff

File tree

1 file changed

+49
-35
lines changed

1 file changed

+49
-35
lines changed

sources/core/tools/oarsh/pam_oar_adopt

+49-35
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
#!/bin/bash
22
#
3-
# pam_oar_adopt is a PAM module that adopts processes launched under ssh
4-
# connections made by users. The processes will be moved inside the correct
5-
# job cgroup, if the user owns all cores of a node in one OAR job.
6-
# If user has multiple jobs on node or one job with only a part of available
7-
# cores, an error is thrown. In that case, `oarsh` must be used.
3+
# pam_oar_adopt is a OAR's PAM script to use with the pam_exec and pam_env
4+
# modules.
5+
#
6+
# pam_oar_adopt adopts processes created upon ssh connections if a valid OAR
7+
# job exists for the user. The processes are moved in the correct OAR job
8+
# cgroup and the OAR jobs environment variables are set. This is done only if
9+
# the user owns all compute resources of the host in one and only one OAR job.
10+
# In other cases, the ssh connection is refused (exit code 1): using oarsh
11+
# becomes necessary.
812
#
913
set -e
1014

@@ -13,44 +17,52 @@ OAR_CGROUP_BASE="$CGROUP_MOUNT_POINT/oar.slice"
1317
USER_UID_MIN=1000
1418

1519
get_oar_cgroups_of_user() {
16-
USER_UID=$(id -u "$1")
17-
readarray -t OAR_SLICES < <( cd "$OAR_CGROUP_BASE" && ls -d "oar-u$USER_UID.slice/oar-u$USER_UID"-j*.slice 2>/dev/null )
18-
OAR_SLICE=${OAR_SLICES[0]}
19-
}
20-
21-
pam_account() {
22-
pam_oar_adopt_enabled_or_exit
20+
# Exit if the PAM service is not sshd (e.g. su, su-l, sudo, sudo-i, ...)
21+
if [ "$PAM_SERVICE" != "sshd" ]; then
22+
exit 0
23+
fi
2324

2425
OAR_USER="${PAM_RUSER:-$PAM_USER}"
26+
2527
if [ -z "$OAR_USER" ]; then
2628
echo "Please launch this module via PAM." 1>&2
2729
exit 1
2830
fi
2931

30-
# We exit if the pam service is su, we don't want to have the error
31-
# message when using su.
32-
if [ "$PAM_SERVICE" = "su-l" ]; then
33-
exit 0
34-
fi
32+
readarray -d: -t PASSWD_ENT < <(getent passwd "$OAR_USER")
33+
USER_UID=${PASSWD_ENT[2]}
3534

3635
# Exit if the user id is inferior than 1000 (system user), indeed there is
3736
# no need to do OAR cgroups machinery in that case.
38-
if [ "$(getent passwd "$OAR_USER" | cut -d: -f3)" -lt "$USER_UID_MIN" ]; then
37+
if [ "$USER_UID" -lt "$USER_UID_MIN" ]; then
3938
exit 0
4039
fi
4140

42-
get_oar_cgroups_of_user "$OAR_USER"
41+
# Exit if oar.slice does not exist (job_resource_manager did not run yet, not job run since last reboot)
42+
if [ ! -d "$OAR_CGROUP_BASE" ]; then
43+
cat <<EOF 1>&2
44+
No running job found for $OAR_USER on this node.
45+
46+
EOF
47+
exit 1
48+
fi
49+
50+
readarray -t OAR_SLICES < <( cd "$OAR_CGROUP_BASE" && ls -d "oar-u$USER_UID.slice/oar-u$USER_UID"-j*.slice 2>/dev/null )
51+
OAR_SLICE=${OAR_SLICES[0]}
52+
}
53+
54+
pam_account() {
55+
pam_oar_adopt_enabled_or_exit
56+
57+
get_oar_cgroups_of_user
4358

44-
# Four cases:
45-
# - the connecting user is oar or root, we fail silently (since we are in 'sufficient' mode)
59+
# Three cases:
4660
# - the user has no cgroups (= no jobs) on node
4761
# - the user has more than one cgroup or one but without all cores
4862
# - the user has one cgroup with all cores
49-
if [ "$OAR_USER" = "oar" ] || [ "$OAR_USER" = "root" ]; then
50-
exit 1
51-
elif [ -z "$OAR_SLICE" ]; then
63+
if [ -z "$OAR_SLICE" ]; then
5264
cat <<EOF 1>&2
53-
No running job found for user $OAR_USER on this node.
65+
No running job found for $OAR_USER on this node.
5466
5567
EOF
5668
exit 1
@@ -76,15 +88,10 @@ EOF
7688
}
7789

7890
pam_session() {
79-
if [ ! -d /var/lib/oar ]; then
80-
echo "OAR directory not found: /var/lib/oar." 1>&2
81-
exit 1
82-
fi
83-
84-
rm -f /var/lib/oar/pam.env
85-
8691
pam_oar_adopt_enabled_or_exit
8792

93+
get_oar_cgroups_of_user
94+
8895
if [ -z "$PAM_TYPE" ]; then
8996
echo "Please launch this module via PAM." 1>&2
9097
exit 1
@@ -95,20 +102,27 @@ pam_session() {
95102
exit 0
96103
fi
97104

98-
OAR_USER="${PAM_RUSER:-$PAM_USER}"
99-
get_oar_cgroups_of_user "$OAR_USER"
100-
101105
# We could not find a running OAR job for this user on this node. It probably means that
102106
# the user connecting is either root or oar (for example because of oarsh).
103107
# We do nothing in that case.
104108
if [ -z "$OAR_SLICE" ]; then
105109
exit 0
106110
fi
107111

112+
if [ ! -d /var/lib/oar ]; then
113+
echo "OAR directory not found: /var/lib/oar." 1>&2
114+
exit 1
115+
fi
116+
108117
# To have the job environment variables, we create a symkink to the already
109118
# created job environment file and let pam_env load it.
110119
OAR_JOB_ENV=${OAR_SLICE%.slice}
111120
OAR_JOB_ENV=/var/lib/oar/${OAR_USER}_${OAR_JOB_ENV#*-j}.env
121+
if [ ! -e "$OAR_JOB_ENV" ]; then
122+
echo "Could not find job env file." 1>&2
123+
exit 1
124+
fi
125+
112126
ln -fs "$OAR_JOB_ENV" /var/lib/oar/pam.env
113127

114128
readarray -t PIDS < <(ps -o ppid= $$)

0 commit comments

Comments
 (0)