-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_config.py
89 lines (73 loc) · 3.28 KB
/
run_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import xml.etree.ElementTree as et
import gen3_spark.settings as config
CONFIG_PATH = '{}/etc/hadoop/'.format(config.HADOOP_HOME)
def indent(elem, level=0):
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
indent(elem, level+1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def configure_core_site():
core_site_path = '{}core-site.xml'.format(CONFIG_PATH)
tree = et.parse(core_site_path)
root = tree.getroot()
root.append(create_property('hadoop.tmp.dir', '{}/hdfs/tmp'.format(config.HADOOP_HOME)))
root.append(create_property('fs.default.name', config.HADOOP_URL))
indent(root)
tree.write(core_site_path)
def configure_hdfs_site():
core_site_path = '{}hdfs-site.xml'.format(CONFIG_PATH)
tree = et.parse(core_site_path)
root = tree.getroot()
root.append(create_property('dfs.blocksize', '268435456'))
root.append(create_property('dfs.hosts', ''))
root.append(create_property('dfs.namenode.handler.count', '100'))
root.append(create_property('dfs.namenode.name.dir', '/hadoop/hdfs/data/dfs/namenode'))
root.append(create_property('dfs.namenode.data.dir', '/hadoop/hdfs/data/dfs/datanode'))
root.append(create_property('dfs.namenode.http-bind-host', config.HADOOP_HOST))
root.append(create_property('dfs.namenode.https-bind-host', config.HADOOP_HOST))
root.append(create_property('dfs.client.use.datanode.hostname', 'true'))
root.append(create_property('dfs.datanode.use.datanode.hostname', 'true'))
root.append(create_property('dfs.permissions', 'false'))
indent(root)
tree.write(core_site_path)
def configure_yarn_site():
core_site_path = '{}yarn-site.xml'.format(CONFIG_PATH)
tree = et.parse(core_site_path)
root = tree.getroot()
root.append(create_property('yarn.nodemanager.aux-services', 'mapreduce_shuffle'))
root.append(create_property('yarn.resourcemanager.scheduler.address', '{}:8030'.format(config.HADOOP_HOST)))
root.append(create_property('yarn.resourcemanager.resource-tracker.address', '{}:8031'.format(config.HADOOP_HOST)))
root.append(create_property('yarn.resourcemanager.address', '{}:8032'.format(config.HADOOP_HOST)))
tree.write(core_site_path)
def configure_mapred_site():
core_site_path = '{}mapred-site.xml'.format(CONFIG_PATH)
tree = et.parse(core_site_path)
root = tree.getroot()
root.append(create_property('mapreduce.framework.name', 'yarn'))
root.append(create_property('mapreduce.application.classpath',
'$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:'
'$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*'))
indent(root)
tree.write(core_site_path)
def create_property(prop_name, prop_val):
prop = et.Element('property')
name = et.Element('name')
name.text = prop_name
value = et.Element('value')
value.text = prop_val
prop.append(name)
prop.append(value)
return prop
if __name__ == '__main__':
configure_core_site()
configure_hdfs_site()
configure_mapred_site()