forked from IBM/data-prep-kit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
.make.transforms
354 lines (295 loc) · 15.3 KB
/
.make.transforms
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
# Include this to get access to a common set of rules for reuse in transform projects.
# include ../../.make.transforms
#
# Before including, the following must be defined:
# REPOROOT points to the top of the git repository.
# For example REPOROOT=../../..
# TRANSFORM_NAME defines the name of the transform and is used to derive
# DOCKER_IMAGE_NAME and TRANSFORM_RUNTIME_SRC_FILE if not already defined.
# Optional inputs (that have defaults)
# TRANSFORM_RUNTIME_SRC_FILE is the base name of the python source file containing the main()
# that is used to launch the transform in Ray.
# By convention this name defaults to $(TRANSFORM_NAME)_transform.py.
# This file is then assumed to be in the home dir of the docker image and
# can be run with "python $(TRANSFORM_RUNTIME_SRC_FILE) --help"
# DOCKER_IMAGE_NAME is the name of the docker image.
# By default its value is $(TRANSFORM_NAME).
#
# Targets defined here use double colon so can be overriden
#
# Reusable rules begin with '.'. To reuse without modification, for example,
# define your local Makefile rule as follows:
# clean: .clean
# To augment the the clean rule
# clean: .clean
# rm -rf other-stuff
#
#######################################################################################
include $(REPOROOT)/.make.defaults
TRANSFORM_RUNTIME?=$(shell basename $$(pwd))
DOCKER_IMAGE_NAME?=$(TRANSFORM_NAME)-$(TRANSFORM_RUNTIME)
# By convention the name of the python file is as follows and contains a main to start the ray-based transform
# This file is used when starting the transformers in the Docker image.
TRANSFORM_RUNTIME_SRC_FILE?=$(TRANSFORM_NAME)_transform_$(TRANSFORM_RUNTIME).py
TRANSFORM_TEST_FILE?=test/test_$(TRANSFORM_NAME).py
# The following taken from https://stackoverflow.com/a/65243296/45375
# Lists all targets and optional help text found in the target.
# :: rule means we first run the help from the main make.defaults.
extra-help:
@# Help: Shows additional make macros that can be overridden/used.
@echo ""
@echo Overridable macro values include the following:
@echo DOCKER - the name of the docker executable to use. DOCKER=$(DOCKER)
@echo DOCKER_FILE - the name of the docker file to use. DOCKER_FILE=$(DOCKER_FILE)
@echo DOCKER_REGISTRY_ENDPOINT - the docker registry location to publish images. DOCKER_REGISTRY_ENDPOINT=$(DOCKER_REGISTRY_ENDPOINT)
@echo DOCKER_HOSTNAME - the name of the docker registry to use. DOCKER_HOSTNAME=$(DOCKER_HOSTNAME)
@echo DOCKER_NAMESPACE - the name space to use in the registry. DOCKER_NAMESPACE=$(DOCKER_NAMESPACE)
@echo DOCKER_IMAGE_NAME - the name under the name space where images are publishes. DOCKER_IMAGE_NAME=$(DOCKER_IMAGE_NAME)
@echo DOCKER_REGISTRY_USER - the docker user to use. DOCKER_REGISTRY_USER=$(DOCKER_REGISTRY_USER)
@echo DOCKER_REGISTRY_KEY - the docker user to use. DOCKER_REGISTRY_KEY=secret
@echo PYTHON - the python executable to use. PYTHON=$(PYTHON)
@echo "TRANSFORM_RUNTIME_SRC_FILE is the base name of the python source file containing the main() (e.g. noop_local_ray.py)"
@echo ""
.PHONY: .transforms.setup
.transforms.setup::
@# Help: Do nothing, since nothing to setup by default.
.PHONY: .transforms.check_required_macros
.transforms.check_required_macros::
@if [ -z "$(TRANSFORM_RUNTIME)" ]; then \
echo ERROR: TRANSFORM_RUNTIME must be defined.; \
fi
@if [ -z "$(TRANSFORM_NAME)" ]; then \
echo ERROR: TRANSFORM_NAME must be defined.; \
fi
@if [ -z "$(DOCKER_IMAGE_VERSION)" ]; then \
echo ERROR: DOCKER_IMAGE_VERSION must be defined.; \
fi
@if [ -z "$(DOCKER_REGISTRY_USER)" ]; then \
echo ERROR: DOCKER_REGISTRY_USER must be defined.; \
fi
@if [ -z "$(DOCKER_REGISTRY_KEY)" ]; then \
echo ERROR: DOCKER_REGISTRY_KEYmust be defined.; \
fi
@if [ -z "$(DOCKER_HOSTNAME)" ]; then \
echo ERROR: DOCKER_HOSTNAME must be defined.; \
fi
.PHONY: .transforms.clean
.transforms.clean:: .defaults.clean
$(MAKE) minio-stop
-rm -rf output > /dev/null 2>&1 # Created when running local samples
-rm -rf python-transform > /dev/null 2>&1 # Created when build ray and spark image
@# Help: Normal cleanups plus stop minio server and rm output dir
# Create the local virtual environment, assuming python is already installed and available
# We upgrade pip as that seems to be required by watson_nlp
# We install wheel, because it seems to be required for fasttext install on redhat.
# We use "pip" instead of "$(PIP)" below because otherwise if the user has overriddent PYTHON
# they will end up installing into that PYTHON and NOT the venv.
.PHONY: .transforms.python-venv
.transforms.python-venv:: .defaults.python-lib-src-venv
.PHONY: .transforms.ray-venv
.transforms.ray-venv:: .defaults.ray-lib-src-venv
# For now we do NOT install ../python source as we do for ray, since for now
# spark implementations do not use the pure python transform. If/when
# that changes, we can start installing ../python source as is done for ray.
.PHONY: .transforms.spark-venv
.transforms.spark-venv:: .defaults.spark-lib-src-venv
.PHONY: .transforms.check_env
.transforms.check_env::
@true
.PHONY: .transforms.python-image
.transforms.python-image:: .defaults.python-lib-whl-image
.PHONY: .transforms.ray-image
.transforms.ray-image:: .defaults.ray-lib-whl-image
.PHONY: .transforms.spark-image
.transforms.spark-image:: .defaults.spark-lib-whl-image
.PHONY: .transforms.python-build
.transforms.python-build:: .transforms.python-venv .transforms.python-image
@# Help: Create the venv and build the transform image
.PHONY: .transforms.ray-build
.transforms.ray-build:: .transforms.ray-venv .transforms.ray-image
@# Help: Create the venv and build the transform image
.PHONY: .transforms.spark-build
.transforms.spark-build:: .transforms.spark-venv .transforms.spark-image
@# Help: Create the venv and build the transform image
# Here we assume this is used to implement the required "test" target used during ci/cd.
# As such, and to avoid running out of disk space during ci/cd, we also do a clean at the end
.PHONY: .transforms.python-test
.transforms.python-test:: .transforms.test-src .transforms.python-test-image .transforms.clean
@# Help: Run both source and image level tests.
# Here we assume this is used to implement the required "test" target used during ci/cd.
# As such, and to avoid running out of disk space during ci/cd, we also do a clean at the end
.PHONY: .transforms.ray-test
.transforms.ray-test:: .transforms.test-src .transforms.ray-test-image .transforms.clean
@# Help: Run both source and image level tests.
# Here we assume this is used to implement the required "test" target used during ci/cd.
# As such, and to avoid running out of disk space during ci/cd, we also do a clean at the end
.PHONY: .transforms.spark-test
.transforms.spark-test:: .transforms.test-src .transforms.spark-test-image .transforms.clean
@# Help: Run both source and image level tests.
# Assumes the runtime-specific venv has already been built.
.PHONY: .transforms.test-src
.transforms.test-src:: .defaults.test-src .transforms.test-locals .transforms.clean
@# Help: Run the transform's tests and any '*local' .py files
.PHONY: .transforms.python-test-image
.transforms.python-test-image:: .transforms.python-image
$(MAKE) .transforms.test-image-help
# $(MAKE) .defaults.test-image-pytest
$(MAKE) .transforms.clean
.PHONY: .transforms.ray-test-image
.transforms.ray-test-image:: .transforms.ray-image .transforms.test-image-help .defaults.test-image-pytest .transforms.clean
.PHONY: .transforms.spark-test-image
.transforms.spark-test-image:: .transforms.spark-image
$(MAKE) .transforms.test-spark_image-help
# $(MAKE) .defaults.test-image-pytest
$(MAKE) .transforms.clean
.PHONY: .transforms.test-image-pytest
.transforms.test-image-pytest:: .defaults.test-image-pytest
.PHONY: .transforms.test-image-help
.transforms.test-image-help::
@# Help: Test an already-built image (use make image) to be sure the --help option works
$(DOCKER) run -t --rm $(DOCKER_LOCAL_IMAGE) python $(TRANSFORM_RUNTIME_SRC_FILE) --help
.transforms.test-spark_image-help::
@# Help: Test an already-built image (use make image) to be sure the --help option works
$(DOCKER) run -t --rm $(DOCKER_LOCAL_IMAGE) python3 $(TRANSFORM_RUNTIME_SRC_FILE) --help
.PHONY: test-locals
test-locals:: .transforms.test-locals
.PHONY: .transforms.test-locals
.transforms.test-locals:: .defaults.test-locals
# Publish the image assuming its already built.
.PHONY: .transforms.publish-image-python
.transforms.publish-image-python:: .defaults.publish-image
.PHONY: .transforms.publish-image-ray
.transforms.publish-image-ray:: .defaults.publish-image
.PHONY: .transforms.publish-image-spark
.transforms.publish-image-spark:: .defaults.publish-image
.PHONY: .transforms-check-exists
.transforms-check-exists:
@exists=$$(find $(CHECK_DIR) -name $(CHECK_FILE_NAME)); \
if [ -z "$$exists" ]; then \
echo $$REQ create $(CHECK_FILE_NAME) in directory $(CHECK_DIR); \
fi
.PHONY: .transforms-check-not-exists
.transforms-check-not-exists:
@exists=$$(find $(CHECK_DIR) -name $(CHECK_FILE_NAME)); \
if [ ! -z "$$exists" ]; then \
echo $REQ remove file $(CHECK_FILE_NAME) from directory $(CHECK_DIR); \
fi
.PHONY: .transforms-check-target
.transforms-check-target:
@exists=$$(grep "^$(CHECK_TARGET):" Makefile); \
if [ -z "$$exists" ]; then \
echo Missing makefile target $(CHECK_TARGET); \
fi
.PHONY: .transforms-check-dir-size
.transforms-check-dir-size:
@mb=$$(du -sm $(CHECK_DIR) | awk '{print $$1}'); \
if [ $$mb -gt 15 ]; then \
echo Directory $(CHECK_DIR) is very large; \
fi
.PHONY: .transforms.kind-load-image
.transforms.kind-load-image:
@# Help: Load the transform image to the kind cluster created with make setup.
kind load docker-image $(DOCKER_REMOTE_IMAGE) --name=$(KIND_CLUSTER_NAME)
.PHONY: conventions
conventions: .transforms.check_required_macros
@# Help: Check transform project conventions and make recommendations, if needed.
@echo "Begin checking transform conventions for $(TRANSFORM_RUNTIME) runtime project. Recommendations/issues, if any, follow..."
@if [ "$(TRANSFORM_RUNTIME)" = "python" ]; then \
$(MAKE) CHECK_DIR=src CHECK_FILE_NAME=$(TRANSFORM_NAME)_transform.py REQ=Must .transforms-check-exists; \
$(MAKE) CHECK_DIR=test CHECK_FILE_NAME=test_$(TRANSFORM_NAME).py REQ=Must .transforms-check-exists; \
$(MAKE) CHECK_DIR=src CHECK_FILE_NAME=$(TRANSFORM_NAME)_local.py REQ=Should .transforms-check-exists; \
else \
$(MAKE) CHECK_DIR=src CHECK_FILE_NAME=$(TRANSFORM_NAME).py REQ=Must .transforms-check-not-exists; \
fi
@$(MAKE) CHECK_DIR=test CHECK_FILE_NAME=test_$(TRANSFORM_NAME)_$(TRANSFORM_RUNTIME).py REQ=Must .transforms-check-exists
@$(MAKE) CHECK_DIR=src CHECK_FILE_NAME=$(TRANSFORM_NAME)_local_$(TRANSFORM_RUNTIME).py REQ=Should .transforms-check-exists
@$(MAKE) CHECK_DIR=test-data CHECK_FILE_NAME=output REQ=Must .transforms-check-not-exists
@$(MAKE) CHECK_DIR=. CHECK_FILE_NAME=.dockerignore REQ=Should .transforms-check-exists
@$(MAKE) CHECK_DIR=test-data .transforms-check-dir-size
@$(MAKE) CHECK_TARGET=build .transforms-check-target
@$(MAKE) CHECK_TARGET=clean .transforms-check-target
@$(MAKE) CHECK_TARGET=image .transforms-check-target
@$(MAKE) CHECK_TARGET=publish .transforms-check-target
@$(MAKE) CHECK_TARGET=setup .transforms-check-target
@$(MAKE) CHECK_TARGET=test .transforms-check-target
@$(MAKE) CHECK_TARGET=test-src .transforms-check-target
@$(MAKE) CHECK_TARGET=test-image .transforms-check-target
@$(MAKE) CHECK_TARGET=set-versions .transforms-check-target
@$(MAKE) CHECK_TARGET=venv .transforms-check-target
@z=$$(echo $(TRANSFORM_NAME) | grep annotator); \
if [ ! -z "$$z" ]; then \
echo "Recommend not using 'annotator' in your transform name"; \
fi
@echo Done checking transform project conventions.
.PHONY: .transforms.run-local-python-sample
.transforms.run-local-python-sample:
@# Help: Run the $(TRANSFORM_NAME)_local_python.py file (if it exists).
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_local_python.py .transforms.run-src-file
.PHONY: .transforms.run-local-ray-sample
.transforms.run-local-ray-sample:
@# Help: Run the $(TRANSFORM_NAME)_local_ray.py file (if it exists).
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_local_ray.py .transforms.run-src-file
.PHONY: .transforms.run-s3-ray-sample
.transforms.run-s3-ray-sample:
$(MAKE) .defaults.minio.verify-running
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_s3_ray.py .transforms.run-src-file
@# Help: Run $(TRANSFORM_NAME)_s3_ray.py file (if it exists). Assumes minio has beens started.
@echo ""
@echo "You may want to stop the minio server now (see make help)"
@echo ""
.PHONY: .transforms.run-local-sample
.transforms.run-local-sample:
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_local.py .transforms.run-src-file
.PHONY: .transforms.run-cli-python-sample
.transforms.run-cli-python-sample:
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform_python.py \
RUN_ARGS=" --data_local_config \"{ 'input_folder' : '../test-data/input', 'output_folder' : '../output'}\" " .transforms.run-src-file
.PHONY: .transforms.run-cli-ray-sample
.transforms.run-cli-ray-sample:
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform_ray.py \
RUN_ARGS="--run_locally True --data_local_config \"{ 'input_folder' : '../test-data/input', 'output_folder' : '../output'}\" " .transforms.run-src-file
.PHONY: .transforms.run-cli-spark-sample
.transforms.run-cli-spark-sample:
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform_spark.py \
RUN_ARGS="--data_local_config \"{ 'input_folder' : '../test-data/input', 'output_folder' : '../output'}\" --spark_local_config_filepath ../config/spark_profile_local.yml " .transforms.run-src-file
# Expects RUN_FILE
.PHONY: .transforms.run-src-file
.transforms.run-src-file: .defaults.run-src-file
.PHONY: .transforms.load-minio
.transforms.minio-load::
@# Help: Install the test-data/input files into minio for $(TRANSFORM_NAME)
dir=$$(find $(REPOROOT)/transforms -type d -name $(TRANSFORM_NAME)); \
$(MAKE) MINIO_SRC=$$dir/ray/test-data/input MINIO_DEST=$(TRANSFORM_NAME)/ .defaults.minio.load-test-data
.PHONY: .minio-start
.minio-start:
$(MAKE) .defaults.minio.start
$(MAKE) .transforms.minio-load
@# Help: Start and load the minio server with $(TRANSFORM_NAME) test data
.PHONY: minio-stop
minio-stop:
$(MAKE) .defaults.minio.stop
# Make sure the given image is found in local image listing
# Expects IMAGE_NAME_TO_VERIFY
.PHONY: .defaults.verify-image-availability
.defaults.verify-image-availability:
$(call check_defined, IMAGE_NAME_TO_VERIFY)
@z=$$($(DOCKER) images | fgrep $(IMAGE_NAME_TO_VERIFY)); \
if [ -z "$$z" ]; then \
echo Image $(IMAGE_NAME_TO_VERIFY) is not available locally.; \
exit 1; \
fi
# Update the toml file to version from .make.version which is expected to be set into DOCKER_IMAGE_VERSION
# Note that the image and distribution versions will be the same.
# Requires version number of Python image as TRANSFORM_PYTHON_VERSION to be set when called
# In addition, update Dockerfile with versions
.transforms.set-versions:
$(call check_defined, TRANSFORM_PYTHON_VERSION)
$(MAKE) .defaults.update-toml
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed -e 's/\("dpk[_-].*transform[_-]python[=<>~][=]\).*"/\1$(TRANSFORM_PYTHON_VERSION)"/'> tt.toml; \
mv tt.toml pyproject.toml; \
fi
if [ -e requirements.txt ]; then \
cat requirements.txt | sed -e 's/\(dpk[_-].*transform[_-]python[=<>~][=]\).*/\1$(TRANSFORM_PYTHON_VERSION)/' > tt.txt; \
mv tt.txt requirements.txt; \
fi