forked from johandahlberg/ansible_spark_openstack
-
Notifications
You must be signed in to change notification settings - Fork 0
/
deploy_spark_playbook.yml
158 lines (118 loc) · 5.32 KB
/
deploy_spark_playbook.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
---
# ------------------------
# Deploy the general stuff
# ------------------------
- hosts: all
become: yes
vars:
ssh_public_key_content: "{{ lookup('file', ssh_public_key ) }}"
vars_files:
- vars/main.yml
pre_tasks:
- name: Update APT cache
apt: update_cache=yes
tasks:
# General tasks
- name: install java
apt: name=openjdk-7-jre state=present update_cache=yes
- name: disable net.ipv6.conf.all.disable_ipv6
sysctl: name=net.ipv6.conf.all.disable_ipv6 value=1 state=present
- name: disable net.ipv6.conf.default.disable_ipv6
sysctl: name=net.ipv6.conf.default.disable_ipv6 value=1 state=present
- name: disable net.ipv6.conf.lo.disable_ipv6
sysctl: name=net.ipv6.conf.lo.disable_ipv6 value=1 state=present
- name: distribute host file
template: src=templates/hosts.j2 dest=/etc/hosts
# Install hadoop
- name: create hadoop group
group: name=hadoop state=present
- name: create hadoop user
user: name={{ hadoop_user }} comment="Hadoop user" group=hadoop shell=/bin/bash
- name: Get hadoop
get_url:
url=http://apache.rediris.es/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz
dest=/usr/local/
sha256sum=D489DF3808244B906EB38F4D081BA49E50C4603DB03EFD5E594A1E98B09259C2
- name: unzip hadoop
unarchive: copy=no src=/usr/local/hadoop-2.7.3.tar.gz dest=/usr/local/ owner={{ hadoop_user }} group=hadoop
- name: set user and priviliges on hadoop
file: path=/usr/local/hadoop-2.7.3 owner={{ hadoop_user }} group=hadoop recurse=yes
- name: distribute hadoop conf
template: src=templates/hadoop-env.sh.j2 dest=/usr/local/hadoop-2.7.3/etc/hadoop/hadoop-env.sh
- name: distribute hadoop core-site.xml
template: src=templates/core-site.xml.j2 dest=/usr/local/hadoop-2.7.3/etc/hadoop/core-site.xml
- name: distribute hadoop hdfs-site.xml
template: src=templates/hdfs-site.xml.j2 dest=/usr/local/hadoop-2.7.3/etc/hadoop/hdfs-site.xml
# Setup ssh-setup for normal user and hadoop user
- name: deploy authorized keys
authorized_key: user={{ item }} key="{{ ssh_public_key_content }}"
with_items:
- "{{ user }}"
- "{{ hadoop_user }}"
- name: deploy ssh-keys
copy: src={{ssh_keys_to_use}} dest=/home/{{ item }}/.ssh/
with_items:
- "{{ user }}"
- "{{ hadoop_user }}"
- name: distribute ssh config
template: src=templates/config.j2 dest=/home/{{ item }}/.ssh/config
with_items:
- "{{ user }}"
- "{{ hadoop_user }}"
# Spark stuff
- name: download spark
get_url:
url=http://d3kbcqa49mib13.cloudfront.net/spark-2.0.2-bin-hadoop2.7.tgz
dest=/opt/
sha256sum=e6349dd38ded84831e3ff7d391ae7f2525c359fb452b0fc32ee2ab637673552a
- name: create spark group
group: name=spark state=present
- name: unzip spark
unarchive: copy=no src=/opt/spark-2.0.2-bin-hadoop2.7.tgz dest=/opt
- name: set user and priviliges on spark
file: path=/opt/spark-2.0.2-bin-hadoop2.7 owner={{ hadoop_user }} group=spark recurse=yes
- name: deploy slaves configuration
template: src=templates/slaves.j2 dest=/opt/spark-2.0.2-bin-hadoop2.7/conf/slaves
- name: deploy spark-env.sh configuration
template: src=templates/spark-env.sh.j2 dest=/opt/spark-2.0.2-bin-hadoop2.7/conf/spark-env.sh
# --------------------------------------------------
# Start the hadoop master
# --------------------------------------------------
- hosts: spark-master
vars_files:
- vars/main.yml
tasks:
- name: distribute hadoop master configuration file
template: src=templates/masters.j2 dest=/usr/local/hadoop-2.7.3/etc/hadoop/masters
- name: distribute hadoop slaves configuration file
template: src=templates/slaves.j2 dest=/usr/local/hadoop-2.7.3/etc/hadoop/slaves
- name: format hdfs (unless it's already been done)
command: /usr/local/hadoop-2.7.3/bin/hadoop namenode -format creates=/usr/local/hadoop-2.7.3/ansible-format-hdfs
- name: touch hfds formatted file (indicates if hdfs has been formatted)
file: state=touch path=/usr/local/hadoop-2.7.3/ansible-format-hdfs
- name: add hadoop and spark binaries to path for hadoop user
lineinfile:
dest=/home/{{ hadoop_user }}/.bashrc
state=present insertafter=EOF
line="export PATH=$PATH:/usr/local/hadoop-2.7.3/bin/:/opt/spark-2.0.2-bin-hadoop2.7/bin/"
create=true
tags:
- current
- name: stop hadoop (if running)
command: /usr/local/hadoop-2.7.3/sbin/stop-dfs.sh
- name: start hadoop
command: /usr/local/hadoop-2.7.3/sbin/start-dfs.sh
# --------------------------------------------------
# Kick of spark (making the master start the slaves)
# and configure spark-master as hadoop master
# --------------------------------------------------
- hosts: spark-master
tasks:
- name: stop spark master (if running)
command: /opt/spark-2.0.2-bin-hadoop2.7/sbin/stop-master.sh
- name: start spark master
shell: SPARK_MASTER_IP="{{ ansible_hostname }}" /opt/spark-2.0.2-bin-hadoop2.7/sbin/start-master.sh
- name: stop the slaves (if running)
shell: /opt/spark-2.0.2-bin-hadoop2.7/sbin/stop-slaves.sh
- name: start the slaves
shell: /opt/spark-2.0.2-bin-hadoop2.7/sbin/start-slaves.sh