-
Notifications
You must be signed in to change notification settings - Fork 1
/
start_lsf_ce.sh
243 lines (214 loc) · 7.36 KB
/
start_lsf_ce.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#!/bin/bash
function init_log()
{
LOGFILE="$1"
if [ ! -e "$LOGFILE" ];then
touch "$LOGFILE"
if [ $? != 0 ];then
echo "ERROR: failed to initial logging. can't create log file $LOGFILE"
fi
fi
}
function init_share_dir()
{
# share the conf/work dir for recover
mkdir -p $HOME_DIR/lsf/conf
mkdir -p $HOME_DIR/lsf/work
mkdir -p $HOME_DIR/mariadb
mkdir -p $HOME_DIR/tools
if [ "$ROLE" = "master" ]; then
cat /etc/hosts |grep $MYHOST >> $HOME_DIR/lsf/conf/hosts
if [ ! -d $HOME_DIR/lsf/work/cluster1 ]; then
cp -arp $LSF_TOP/conf/* $HOME_DIR/lsf/conf
cp -arp $LSF_TOP/work/* $HOME_DIR/lsf/work
fi
rm -rf $LSF_TOP/conf/ && ln -s $HOME_DIR/lsf/conf/ /$LSF_TOP/
rm -rf $LSF_TOP/work/ && ln -s $HOME_DIR/lsf/work/ /$LSF_TOP/
else
# update master hostname
sed -i "s/$MYHOST/$LSF_MASTER_LIST/g" $LSF_TOP/conf/lsf.cluster.cluster1
sed -i "s/$MYHOST/$LSF_MASTER_LIST/g" $LSF_TOP/conf/ego/cluster1/kernel/ego.conf
sed -i "s/$MYHOST/$LSF_MASTER_LIST/g" $LSF_TOP/conf/lsf.conf
while true; do
if [ ! -e $HOME_DIR/lsf/conf/hosts ];then
sleep 2
log_info "waiting for lsf master service startup ..."
else
break
fi
done
# delete duplicate host
sed -i "/\b`hostname -i`\b/d" $HOME_DIR/lsf/conf/hosts
cat /etc/hosts |grep $MYHOST >> $HOME_DIR/lsf/conf/hosts
ln -s $HOME_DIR/lsf/conf/hosts $LSF_TOP/conf/hosts
fi
}
function log()
{
echo `date` "$@" | tee -a "$LOGFILE"
}
function log_info()
{
log "INFO:" "$@"
}
function log_error()
{
log "ERROR:" "$@"
}
function log_warn()
{
log "WARN:" "$@"
}
function update_etc_hosts()
{
# update etc/hosts file so that no "HOST_NOT_FOUND" issue
# raised by pmpi, since pmpi depends on 'gethostbyname' get
# ip/hostname mapping
(
cat << EOF
# Kubernetes-managed hosts file.
127.0.0.1 localhost
::1 localhost ip6-localhost ip6-loopback
fe00::0 ip6-localnet
fe00::0 ip6-mcastprefix
fe00::1 ip6-allnodes
fe00::2 ip6-allrouters
`cat $HOME_DIR/lsf/conf/hosts`
EOF
) > /etc/hosts
}
function init_database()
{
while true; do
</dev/tcp/127.0.0.1/3306 && break
sleep 3
log_info "waiting for maria database service startup ..."
done
(
cat << EOF
<?xml version="1.0" encoding="UTF-8"?>
<ds:DataSources xmlns:ds="http://www.ibm.com/perf/2006/01/datasource" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xsi:schemaLocation="http://www.ibm.com/perf/2006/01/datasource datasource.xsd">
<ds:DataSource Name="ReportDB"
Driver="org.gjt.mm.mysql.Driver"
Connection="jdbc:mysql://127.0.0.1:3306/pac"
Default="true"
Cipher="des56"
UserName="uOTzmooF4Qw="
Password="uOTzmooF4Qw=">
<ds:Properties>
<ds:Property>
<ds:Name>maxActive</ds:Name>
<ds:Value>30</ds:Value>
</ds:Property>
</ds:Properties>
</ds:DataSource>
</ds:DataSources>
EOF
) > $PAC_TOP/perf/conf/datasource.xml
log_info "check whether database already exists."
/usr/bin/mysql -uroot -p$MYSQL_PASSWORD -D$DB_NAME -h127.0.0.1 -e "select count(1) from PMC_USER;"
if [ $? -eq 0 ]; then
log_info "pac database already exists."
return
fi
log_info "creating MYSQL database for Platform Application Center"
/usr/bin/mysql -uroot -p$MYSQL_PASSWORD -h127.0.0.1 -e "create database if not exists $DB_NAME default character set utf8 default collate utf8_bin;"
/usr/bin/mysql -uroot -p$MYSQL_PASSWORD -h127.0.0.1 -e "GRANT ALL ON $DB_NAME.* TO [email protected] IDENTIFIED BY 'pacuser';"
/usr/bin/mysql -uroot -p$MYSQL_PASSWORD -h127.0.0.1 -D$DB_NAME < $PAC_TOP/perf/lsf/10.0/DBschema/MySQL/lsf_sql.sql
/usr/bin/mysql -uroot -p$MYSQL_PASSWORD -h127.0.0.1 -D$DB_NAME < $PAC_TOP/perf/ego/1.2/DBschema/MySQL/egodata.sql
/usr/bin/mysql -uroot -p$MYSQL_PASSWORD -h127.0.0.1 -D$DB_NAME < $PAC_TOP/perf/lsf/10.0/DBschema/MySQL/lsfdata.sql
/usr/bin/mysql -uroot -p$MYSQL_PASSWORD -h127.0.0.1 -D$DB_NAME < $PAC_TOP/gui/DBschema/MySQL/create_schema.sql
/usr/bin/mysql -uroot -p$MYSQL_PASSWORD -h127.0.0.1 -D$DB_NAME < $PAC_TOP/gui/DBschema/MySQL/create_pac_schema.sql
/usr/bin/mysql -uroot -p$MYSQL_PASSWORD -h127.0.0.1 -D$DB_NAME < $PAC_TOP/gui/DBschema/MySQL/init.sql
log_info "MYSQL database for Platform Application Center is created."
}
function start_lsf()
{
log_info "Start LSF services on $ROLE host $MYHOST..."
source $LSF_TOP/conf/profile.lsf
lsadmin limstartup >>$LOGFILE 2>&1
lsadmin resstartup >>$LOGFILE 2>&1
badmin hstartup >>$LOGFILE 2>&1
log_info "LSF services on $ROLE host $MYHOST started."
}
function start_pac()
{
log_info "Start PAC services on $ROLE host $MYHOST..."
source $PAC_TOP/profile.platform
perfadmin start all >>$LOGFILE 2>&1
pmcadmin start >>$LOGFILE 2>&1
}
function generate_lock()
{
log_info "generate lock file."
echo 1 > $LOCKFILE
}
function config_lsfce()
{
# the host name from base image
IMAGE_HOST=`awk -F'"' '/MASTER_LIST/ {print $(NF-1)}' $LSF_TOP/conf/lsf.conf`
find $LSF_TOP/work/cluster1/logdir \
$LSF_TOP/conf \
$PAC_TOP/gui/conf \
$PAC_TOP/perf/conf \
$PAC_TOP/rule-engine/conf/rule-engine-config.xml \
-type f -print0 | xargs -0 sed -i "s/$IMAGE_HOST/$MYHOST/g"
# make lsf read hosts file when new hosts added to cluster
echo "LSF_HOST_CACHE_NTTL=0" >> $LSF_TOP/conf/lsf.conf
echo "LSF_DHCP_ENV=y" >> $LSF_TOP/conf/lsf.conf
echo "LSF_HOST_CACHE_DISABLE=y" >> $LSF_TOP/conf/lsf.conf
echo "LSF_DYNAMIC_HOST_TIMEOUT=10m" >> $LSF_TOP/conf/lsf.conf
# enable floating client
sed -i "/# FLOAT_CLIENTS_ADDR_RANGE=/a\FLOAT_CLIENTS_ADDR_RANGE=*.*.*.*" $LSF_TOP/conf/lsf.cluster.cluster1
sed -i "/# FLOAT_CLIENTS=/a\FLOAT_CLIENTS=10" $LSF_TOP/conf/lsf.cluster.cluster1
}
############################### main ############################################
############## CMD parameter from docker run ##########
#lsf master or slave
ROLE=$1
# db root password
MYSQL_PASSWORD=$2
#lsf master host name
LSF_MASTER_LIST=$3
log_info "CMD parameter: ROLE=$1 MYSQL_PASSWORD=$2 LSF_MASTER_LIST=$3"
#######################################
MYHOST=`uname -n`
HOME_DIR="/home/lsfadmin"
LSF_TOP="/opt/ibm/lsf"
PAC_TOP="/opt/ibm/pac"
LOGFILE="/tmp/start_lsf_ce_$MYHOST.log"
LOCKFILE="$LSF_TOP/lsf_ce_$MYHOST.lock"
DB_NAME="pac"
ETC_HOSTS_UPDATE_TIME_1=0
if [ -f "$LOCKFILE" ]; then
log_info "lock file exists in $LOCKFILE, just start LSF service."
else
init_log $LOGFILE
config_lsfce
if [ "$ROLE" = "master" ]; then
init_database
fi
init_share_dir
fi
start_lsf
if [ "$ROLE" = "master" ]; then
start_pac
fi
generate_lock
# hang here now
while true; do
if test $(pgrep -f lim | wc -l) -eq 0
then
log_error "LIM process has exited due to a fatal error."
log_error `tail -n 20 $LSF_TOP/log/lim.log.$MYHOST`
exit 1
else
echo `date` "LSF is running -:) ..."
fi
ETC_HOSTS_UPDATE_TIME_2=`stat -c %Y $HOME_DIR/lsf/conf/hosts`
if [ "$ETC_HOSTS_UPDATE_TIME_1" != "$ETC_HOSTS_UPDATE_TIME_2" ]; then
update_etc_hosts
ETC_HOSTS_UPDATE_TIME_1=ETC_HOSTS_UPDATE_TIME_2
fi
sleep 10
done