-
Notifications
You must be signed in to change notification settings - Fork 0
/
docker-compose.yml
230 lines (214 loc) · 5.34 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
version: '2'
services:
#For bootstrapping the graph database
provesense:
build: .
environment:
SPARQL_ENDPOINT: http://blazegraph:9999/blazegraph/sparql
volumes:
# - .:/code
- ./bootstrap:/tmp/bootstrap
depends_on:
- blazegraph
# - redis
# restart: always
#viz
lodmilla:
image: scambria/lodmilla-frontend:provesense
ports:
- "9998:80"
ld-r:
image: scambria/ld-r
ports:
- "4000:4000"
volumes:
- ./ld-r-configs:/ld-r/configs
- ./ld-r-services:/ld-r/services
restart: always
#API
provesense-api:
image: scambria/provesense-api:latest
ports:
- "4567:4567"
environment:
SPARQL_ENDPOINT: http://blazegraph:9999/blazegraph/sparql
#graph database
blazegraph:
image: nicholsn/docker-blazegraph
ports:
- "9999:9999"
volumes:
- .:/tmp/blazegraph
- ./bootstrap:/tmp/bootstrap
#kafka + zookeeper
kafka:
image: spotify/kafka
ports:
- "9092:9092"
- "2181:2181"
environment:
- ADVERTISED_HOST=kafka
- ADVERTISED_PORT=9092
restart: always
#Spark streaming master + 3 workers for ingestion
master:
image: scambria/provesense-ingest
#TODO: submit job when container starts; this approach fails
# command: bin/spark-class org.apache.spark.deploy.master.Master -h master \
# && bin/spark-submit --jars \
# | ./jars/spark-streaming-kafka-0-8-assembly_2.11-2.1.0-SNAPSHOT.jar \
# | python/direct_stream.py kafka:9092 provesense.inbound
command: bin/spark-class org.apache.spark.deploy.master.Master -h master
hostname: master
environment:
MASTER: spark://master:7077
SPARK_CONF_DIR: /conf
SPARK_PUBLIC_DNS: localhost
SPARQL_ENDPOINT: http://blazegraph:9999/blazegraph/sparql
LOGGER_NAME: provesense-spark
LOG_DIR: /var/log/provesense
LOG_FILE: provesense-spark.log
LOG_STDOUT: 'False'
LOG_JSON: 'True'
LOG_LEVEL: DEBUG
expose:
- 7001
- 7002
- 7003
- 7004
- 7005
- 7006
- 7077
- 6066
ports:
- 4040:4040
- 6066:6066
- 7077:7077
- 8080:8080
volumes:
- ./data:/tmp/data
- ./ingest:/tmp/ingest
- ./spark:/usr/app
depends_on:
- kafka
- blazegraph
restart: always
worker1:
image: scambria/provesense-ingest
command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
hostname: worker1
environment:
SPARK_CONF_DIR: /conf
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_WORKER_PORT: 8881
SPARK_WORKER_WEBUI_PORT: 8081
SPARK_PUBLIC_DNS: localhost
SPARQL_ENDPOINT: http://blazegraph:9999/blazegraph/sparql
links:
- master
expose:
- 7012
- 7013
- 7014
- 7015
- 7016
- 8881
ports:
- 8081:8081
volumes:
- ./data:/tmp/data
- ./logs:/usr/spark-2.0.1/work
depends_on:
- master
restart: always
worker2:
image: scambria/provesense-ingest
command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
hostname: worker2
environment:
SPARK_CONF_DIR: /conf
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_WORKER_PORT: 8881
SPARK_WORKER_WEBUI_PORT: 8081
SPARK_PUBLIC_DNS: localhost
SPARQL_ENDPOINT: http://blazegraph:9999/blazegraph/sparql
links:
- master
expose:
- 7012
- 7013
- 7014
- 7015
- 7016
- 8881
ports:
- 8082:8081
volumes:
- ./data:/tmp/data
- ./logs:/usr/spark-2.0.1/work
depends_on:
- master
restart: always
worker3:
image: scambria/provesense-ingest
command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
hostname: worker3
environment:
SPARK_CONF_DIR: /conf
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_WORKER_PORT: 8881
SPARK_WORKER_WEBUI_PORT: 8081
SPARK_PUBLIC_DNS: localhost
SPARQL_ENDPOINT: http://blazegraph:9999/blazegraph/sparql
links:
- master
expose:
- 7012
- 7013
- 7014
- 7015
- 7016
- 8881
ports:
- 8083:8081
volumes:
- ./data:/tmp/data
- ./logs:/usr/spark-2.0.1/work
depends_on:
- master
restart: always
# redis:
# image: redis
#ELK for logging
#may need to run `sudo sysctl -w vm.max_map_count=262144` on the host prior to booting
# elasticsearch:
# image: elasticsearch:latest
# command: elasticsearch
# ports:
# - "9200:9200"
# - "9300:9300"
# restart: always
# logstash:
# image: logstash:latest
# command: logstash -f /etc/logstash/conf.d/logstash.conf
# volumes:
# - ./logs:/tmp/logs
# - ./elk/spark-logstash.conf:/etc/logstash/conf.d/logstash.conf
# - ./elk/logs-template.json:/etc/logstash/templates/logs-template.json
# ports:
# - "5000:5000"
# depends_on:
# - elasticsearch
# restart: always
# kibana:
# image: kibana:latest
# volumes:
# - ./elk:/opt/kibana/config/
# ports:
# - "5601:5601"
# depends_on:
# - elasticsearch
# restart: always