diff --git a/.dockerignore b/.dockerignore index 69589d1..7f98667 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,6 @@ * !bin/profile-examples.sh +!bin/profile-example-memory-usage.sh !jupyter_notebook_config.py !LightGBM/lib_lightgbm.so !LightGBM/LICENSE diff --git a/Dockerfile-profiling b/Dockerfile-profiling index fd9f1e9..3cb3411 100644 --- a/Dockerfile-profiling +++ b/Dockerfile-profiling @@ -6,7 +6,9 @@ FROM ${BASE_IMAGE} RUN pip install --no-cache-dir \ memray \ pytest \ + pytest-memray \ pytest-profiling \ snakeviz COPY bin/profile-examples.sh /usr/local/bin/profile-examples.sh +COPY bin/profile-example-memory-usage.sh /usr/local/bin/profile-example-memory-usage.sh diff --git a/Makefile b/Makefile index 13f84de..56ea34e 100644 --- a/Makefile +++ b/Makefile @@ -164,13 +164,27 @@ profiling-image: cluster-image echo "image '${PROFILING_IMAGE}' already exists. To force rebuilding, run 'make profiling-image -e FORCE_REBUILD_PROFILING_IMAGE=1'."; \ exit 0; \ fi; \ - fi; + fi && \ docker build \ -t ${PROFILING_IMAGE} \ --build-arg BASE_IMAGE=${CLUSTER_IMAGE} \ -f Dockerfile-profiling \ . +.PHONY: profile-memory-usage +profile-memory-usage: profiling-image + docker run \ + --rm \ + --env LIGHTGBM_HOME=/opt/LightGBM \ + --env PROFILING_OUTPUT_DIR=/profiling-output/memory-usage \ + -v $$(pwd)/profiling-output:/profiling-output \ + -v $$(pwd)/LightGBM:/opt/LightGBM \ + --workdir=/opt/LightGBM \ + --entrypoint="" \ + -it ${PROFILING_IMAGE} \ + /bin/bash -cex \ + '/bin/bash /usr/local/bin/profile-example-memory-usage.sh' + # https://docs.amazonaws.cn/en_us/AmazonECR/latest/public/docker-push-ecr-image.html .PHONY: push-image push-image: create-repo diff --git a/README.md b/README.md index ac57bf6..f688896 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,23 @@ make profile Then navigate to `http://0.0.0.0:8080/snakeviz/%2Fprofiling-output` in your web browser. +### memory profiling + +To summarize memory allocations in typical uses of LightGBM, and to attribute those memory allocations to particular codepaths, you can run its examples under `memray` ([link](https://github.com/bloomberg/memray)). + +```shell +make profile-memory-usage +``` + +That will generate a bunch of HTML files. +View them in your browser by running the following, then navigating to `localhost:1234`. + +```shell +python -m http.server \ + --directory ./profiling-output/memory-usage \ + 1234 +``` + ## Useful Links * https://github.com/microsoft/LightGBM/pull/3515 diff --git a/bin/profile-example-memory-usage.sh b/bin/profile-example-memory-usage.sh new file mode 100644 index 0000000..fd29656 --- /dev/null +++ b/bin/profile-example-memory-usage.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# [description] +# +# Profile memory usage of all of LightGBM's Python examples, using memray. + +set -e -u -o pipefail + +echo "profiling examples" +mkdir -p "${PROFILING_OUTPUT_DIR}/bin" +for py_script in $(find "${LIGHTGBM_HOME}/examples/python-guide" -name '*.py'); do + base_filename=$(basename "${py_script}") + prof_file=$(echo "${base_filename}" | sed -e 's/\.py/\.bin/g') + table_file=$(echo "${base_filename}" | sed -e 's/\.py/-table\.html/g') + leak_table_file=$(echo "${base_filename}" | sed -e 's/\.py/-leak-table\.html/g') + flamegraph_file=$(echo "${base_filename}" | sed -e 's/\.py/-flamegraph\.html/g') + echo " - ${base_filename}" + memray run \ + -o "${PROFILING_OUTPUT_DIR}/bin/${prof_file}" \ + "${py_script}" 2>&1 > /dev/null \ + || true + memray table \ + -o "${PROFILING_OUTPUT_DIR}/${table_file}" \ + --force \ + "${PROFILING_OUTPUT_DIR}/bin/${prof_file}" + memray table \ + -o "${PROFILING_OUTPUT_DIR}/${leak_table_file}" \ + --force \ + --leaks \ + "${PROFILING_OUTPUT_DIR}/bin/${prof_file}" + memray flamegraph \ + -o "${PROFILING_OUTPUT_DIR}/${flamegraph_file}" \ + --force \ + "${PROFILING_OUTPUT_DIR}/bin/${prof_file}" +done +echo "Done profiling examples. See '${PROFILING_OUTPUT_DIR}' for results."