1
1
#! /bin/bash
2
2
#
3
- # pam_oar_adopt is a PAM module that adopts processes launched under ssh
4
- # connections made by users. The processes will be moved inside the correct
5
- # job cgroup, if the user owns all cores of a node in one OAR job.
6
- # If user has multiple jobs on node or one job with only a part of available
7
- # cores, an error is thrown. In that case, `oarsh` must be used.
3
+ # pam_oar_adopt is a OAR's PAM script to use with the pam_exec and pam_env
4
+ # modules.
5
+ #
6
+ # pam_oar_adopt adopts processes created upon ssh connections if a valid OAR
7
+ # job exists for the user. The processes are moved in the correct OAR job
8
+ # cgroup and the OAR jobs environment variables are set. This is done only if
9
+ # the user owns all compute resources of the host in one and only one OAR job.
10
+ # In other cases, the ssh connection is refused (exit code 1): using oarsh
11
+ # becomes necessary.
8
12
#
9
13
set -e
10
14
@@ -13,44 +17,52 @@ OAR_CGROUP_BASE="$CGROUP_MOUNT_POINT/oar.slice"
13
17
USER_UID_MIN=1000
14
18
15
19
get_oar_cgroups_of_user () {
16
- USER_UID=$( id -u " $1 " )
17
- readarray -t OAR_SLICES < <( cd " $OAR_CGROUP_BASE " && ls -d " oar-u$USER_UID .slice/oar-u$USER_UID " -j* .slice 2> /dev/null )
18
- OAR_SLICE=${OAR_SLICES[0]}
19
- }
20
-
21
- pam_account () {
22
- pam_oar_adopt_enabled_or_exit
20
+ # Exit if the PAM service is not sshd (e.g. su, su-l, sudo, sudo-i, ...)
21
+ if [ " $PAM_SERVICE " != " sshd" ]; then
22
+ exit 0
23
+ fi
23
24
24
25
OAR_USER=" ${PAM_RUSER:- $PAM_USER } "
26
+
25
27
if [ -z " $OAR_USER " ]; then
26
28
echo " Please launch this module via PAM." 1>&2
27
29
exit 1
28
30
fi
29
31
30
- # We exit if the pam service is su, we don't want to have the error
31
- # message when using su.
32
- if [ " $PAM_SERVICE " = " su-l" ]; then
33
- exit 0
34
- fi
32
+ readarray -d: -t PASSWD_ENT < <( getent passwd " $OAR_USER " )
33
+ USER_UID=${PASSWD_ENT[2]}
35
34
36
35
# Exit if the user id is inferior than 1000 (system user), indeed there is
37
36
# no need to do OAR cgroups machinery in that case.
38
- if [ " $( getent passwd " $OAR_USER " | cut -d: -f3 ) " -lt " $USER_UID_MIN " ]; then
37
+ if [ " $USER_UID " -lt " $USER_UID_MIN " ]; then
39
38
exit 0
40
39
fi
41
40
42
- get_oar_cgroups_of_user " $OAR_USER "
41
+ # Exit if oar.slice does not exist (job_resource_manager did not run yet, not job run since last reboot)
42
+ if [ ! -d " $OAR_CGROUP_BASE " ]; then
43
+ cat << EOF 1>&2
44
+ No running job found for $OAR_USER on this node.
45
+
46
+ EOF
47
+ exit 1
48
+ fi
49
+
50
+ readarray -t OAR_SLICES < <( cd " $OAR_CGROUP_BASE " && ls -d " oar-u$USER_UID .slice/oar-u$USER_UID " -j* .slice 2> /dev/null )
51
+ OAR_SLICE=${OAR_SLICES[0]}
52
+ }
53
+
54
+ pam_account () {
55
+ pam_oar_adopt_enabled_or_exit
56
+
57
+ get_oar_cgroups_of_user
43
58
44
- # Four cases:
45
- # - the connecting user is oar or root, we fail silently (since we are in 'sufficient' mode)
59
+ # Three cases:
46
60
# - the user has no cgroups (= no jobs) on node
47
61
# - the user has more than one cgroup or one but without all cores
48
62
# - the user has one cgroup with all cores
49
- if [ " $OAR_USER " = " oar" ] || [ " $OAR_USER " = " root" ]; then
50
- exit 1
51
- elif [ -z " $OAR_SLICE " ]; then
63
+ if [ -z " $OAR_SLICE " ]; then
52
64
cat << EOF 1>&2
53
- No running job found for user $OAR_USER on this node.
65
+ No running job found for $OAR_USER on this node.
54
66
55
67
EOF
56
68
exit 1
76
88
}
77
89
78
90
pam_session () {
79
- if [ ! -d /var/lib/oar ]; then
80
- echo " OAR directory not found: /var/lib/oar." 1>&2
81
- exit 1
82
- fi
83
-
84
- rm -f /var/lib/oar/pam.env
85
-
86
91
pam_oar_adopt_enabled_or_exit
87
92
93
+ get_oar_cgroups_of_user
94
+
88
95
if [ -z " $PAM_TYPE " ]; then
89
96
echo " Please launch this module via PAM." 1>&2
90
97
exit 1
@@ -95,20 +102,27 @@ pam_session() {
95
102
exit 0
96
103
fi
97
104
98
- OAR_USER=" ${PAM_RUSER:- $PAM_USER } "
99
- get_oar_cgroups_of_user " $OAR_USER "
100
-
101
105
# We could not find a running OAR job for this user on this node. It probably means that
102
106
# the user connecting is either root or oar (for example because of oarsh).
103
107
# We do nothing in that case.
104
108
if [ -z " $OAR_SLICE " ]; then
105
109
exit 0
106
110
fi
107
111
112
+ if [ ! -d /var/lib/oar ]; then
113
+ echo " OAR directory not found: /var/lib/oar." 1>&2
114
+ exit 1
115
+ fi
116
+
108
117
# To have the job environment variables, we create a symkink to the already
109
118
# created job environment file and let pam_env load it.
110
119
OAR_JOB_ENV=${OAR_SLICE% .slice}
111
120
OAR_JOB_ENV=/var/lib/oar/${OAR_USER} _${OAR_JOB_ENV#* -j} .env
121
+ if [ ! -e " $OAR_JOB_ENV " ]; then
122
+ echo " Could not find job env file." 1>&2
123
+ exit 1
124
+ fi
125
+
112
126
ln -fs " $OAR_JOB_ENV " /var/lib/oar/pam.env
113
127
114
128
readarray -t PIDS < <( ps -o ppid= $$ )
0 commit comments