Skip to content

Commit

Permalink
Refactored the docker build script to only only include the gatk bund…
Browse files Browse the repository at this point in the history
…le in order to shrink the docker image size (#4955)

* Refactored the docker image to contain only the runtime jars and other necessary files for space concerns
  • Loading branch information
jamesemery authored and droazen committed Jul 6, 2018
1 parent 4521d32 commit d6a7fcd
Show file tree
Hide file tree
Showing 11 changed files with 366 additions and 110 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# These are files that for whatever reason we don't want to include in our distribution docker images
src/test/resources
src/test/resources/*
.git/*
11 changes: 6 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ env:
#gradle needs this
- TERM=dumb
#limit gradle jvm memory and disable daemon
- GRADLE_OPTS="-Xmx1024m -Dorg.gradle.daemon=false"
- GRADLE_OPTS="-Xmx2048m -Dorg.gradle.daemon=false"
#google cloud stuff
- CLOUDSDK_CORE_DISABLE_PROMPTS=1
- GCLOUD_HOME=$HOME/gcloud/google-cloud-sdk/bin
Expand Down Expand Up @@ -57,7 +57,7 @@ cache:
before_install:
- REPORT_PATH=${TRAVIS_BRANCH}_${TRAVIS_JOB_NUMBER};
- if [[ $TRAVIS_SECURE_ENV_VARS == true && $TRAVIS_EVENT_TYPE != cron ]]; then
echo "Test report will be written to https://storage.googleapis.com${HELLBENDER_TEST_LOGS}${REPORT_PATH}/tests/test/index.html";
echo "Test report will be written to https://storage.googleapis.com$HELLBENDER_TEST_LOGS$REPORT_PATH/tests/test/index.html";
fi
#setup google cloud and github authentication
- if [ $TRAVIS_SECURE_ENV_VARS == true ]; then
Expand Down Expand Up @@ -134,18 +134,19 @@ script:
elif [[ $TEST_DOCKER == true ]]; then
echo "Building docker image and running appropriate tests..." ;
if [ ${TRAVIS_PULL_REQUEST} != false ]; then
sudo bash build_docker.sh -e FETCH_HEAD -s -u -d $PWD/temp_staging/ -t ${TRAVIS_PULL_REQUEST};
sudo bash build_docker.sh -e FETCH_HEAD -s -u -t ${TRAVIS_PULL_REQUEST};
DOCKER_TAG=FETCH_HEAD;
else
echo ${TRAVIS_COMMIT};
sudo bash build_docker.sh -e ${TRAVIS_COMMIT} -s -u -d $PWD/temp_staging/;
sudo bash build_docker.sh -e ${TRAVIS_COMMIT} -s -u;
DOCKER_TAG=$TRAVIS_COMMIT;
fi;
sudo docker images;
echo ${TEST_TYPE};
sudo mkdir -p build/reports/;
sudo chmod -R a+w build/reports/;
sudo docker run -v $(pwd)/src/test/resources:/testdata -v $(pwd)/build/reports/:/gatk/build/reports/ --rm -e "TEST_VERBOSITY=minimal" -e "TEST_TYPE=${TEST_TYPE}" -t broadinstitute/gatk:${DOCKER_TAG} bash --init-file /gatk/gatkenv.rc /root/run_unit_tests.sh;
cp scripts/docker/dockertest.gradle .;
sudo docker run -v $(pwd):/gatkCloneMountPoint:cached -v $(pwd)/testJars:/jars:cached --rm -e "TEST_VERBOSITY=minimal" -e "TEST_TYPE=${TEST_TYPE}" -t broadinstitute/gatk:${DOCKER_TAG} bash --init-file /gatk/gatkenv.rc /root/run_unit_tests.sh && sudo mkdir build/reports/tests/test && sudo cp -rp build/reports/tests/testOnPackagedReleaseJar/* build/reports/tests/test && sudo rm -r build/reports/tests/testOnPackagedReleaseJar;
else
./gatk PrintReads -I src/test/resources/NA12878.chr17_69k_70k.dictFix.bam -O output.bam;
travis_wait 50 ./gradlew jacocoTestReport;
Expand Down
45 changes: 29 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,41 @@
# Using OpenJDK 8
FROM broadinstitute/gatk:gatkbase-1.2.3
ARG DRELEASE
FROM broadinstitute/gatk:gatkbase-2.0.0

ADD . /gatk
# Location of the unzipped gatk bundle files
ARG ZIPPATH

ADD $ZIPPATH /gatk

WORKDIR /gatk
RUN /gatk/gradlew clean compileTestJava sparkJar localJar condaEnvironmentDefinition -Drelease=$DRELEASE

#Setup linked jars that may be needed for running gatk
RUN ln -s $( find /gatk -name "gatk*local.jar" ) gatk.jar
RUN ln -s $( find /gatk -name "gatk*local.jar" ) /root/gatk.jar
RUN ln -s $( find /gatk -name "gatk*spark.jar" ) gatk-spark.jar

WORKDIR /root

# Make sure we can see a help message
RUN ln -sFv /gatk/build/libs/gatk.jar
# Make sure we can see a help message
RUN java -jar gatk.jar -h
RUN mkdir /gatkCloneMountPoint
RUN mkdir /jars
RUN mkdir .gradle

#Setup test data
WORKDIR /gatk
# Create link to where test data is expected
RUN ln -s /testdata src/test/resources

# Create a simple unit test runner
ENV CI true
RUN echo "source activate gatk" > /root/run_unit_tests.sh && \
echo "cd /gatk/ && ./gradlew jacocoTestReport" >> /root/run_unit_tests.sh
echo "export TEST_JAR=\$( find /jars -name \"gatk*test.jar\" )" >> /root/run_unit_tests.sh && \
echo "export TEST_DEPENDENCY_JAR=\$( find /jars -name \"gatk*testDependencies.jar\" )" >> /root/run_unit_tests.sh && \
echo "export GATK_JAR=$( find /gatk -name "gatk*local.jar" )" >> /root/run_unit_tests.sh && \
echo "cp -rp /gatkCloneMountPoint/src/main/java/* /gatk/srcdir" >> /root/run_unit_tests.sh && \
echo "export SOURCE_DIR=/gatk/srcdir" >> /root/run_unit_tests.sh && \
echo "export GRADLE_OPTS=\"-Xmx1024m -Dorg.gradle.daemon=false\"" /root/run_unit_tests.sh && \
echo "export CP_DIR=/gatk/testClasses" /root/run_unit_tests.sh && \
echo "ln -s /gatkCloneMountPoint/src/ /gatkCloneMountPoint/scripts/docker/src" >> /root/run_unit_tests.sh && \
echo "ln -s /gatkCloneMountPoint/build/ /gatkCloneMountPoint/scripts/docker/build" >> /root/run_unit_tests.sh && \
echo "cd /gatk/ && /gatkCloneMountPoint/gradlew -b /gatkCloneMountPoint/dockertest.gradle testOnPackagedReleaseJar jacocoTestReportOnPackagedReleaseJar -a -p /gatkCloneMountPoint" >> /root/run_unit_tests.sh

WORKDIR /root
RUN cp -r /root/run_unit_tests.sh /gatk
Expand All @@ -39,16 +53,15 @@ RUN mkdir $DOWNLOAD_DIR && \
test "`md5sum $DOWNLOAD_DIR/miniconda.sh | awk -v FS=' ' '{print $1}'` = $CONDA_MD5" && \
bash $DOWNLOAD_DIR/miniconda.sh -p $CONDA_PATH -b && \
rm $DOWNLOAD_DIR/miniconda.sh
ENV PATH $CONDA_PATH/envs/gatk/bin:$CONDA_PATH/bin:$PATH
WORKDIR /gatk/build
RUN conda env create -n gatk -f gatkcondaenv.yml && \
echo "source activate gatk" >> /gatk/gatkenv.rc
WORKDIR /gatk
ENV PATH $CONDA_PATH/envs/gatk/bin:$CONDA_PATH/bin:$PATH
RUN conda env create -n gatk -f /gatk/gatkcondaenv.yml && \
echo "source activate gatk" >> /gatk/gatkenv.rc && \
conda clean -y -all && \
rm -rf /root/.cache/pip

CMD ["bash", "--init-file", "/gatk/gatkenv.rc"]

# End GATK Python environment

WORKDIR /gatk

ENV PATH /gatk:$PATH
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ echo "source <PATH_TO>/gatk-completion.sh" >> ~/.bashrc
* Test report is in `build/reports/tests/test/index.html`.
* What will happen depends on the value of the `TEST_TYPE` environment variable:
* unset or any other value : run non-cloud unit and integration tests, this is the default
* `cloud`, `unit`, `integration`, `spark` : run only the cloud, unit, integration, or Spark tests
* `cloud`, `unit`, `integration`, `spark`, `python` : run only the cloud, unit, integration, python, or Spark tests
* `all` : run the entire test suite
* Cloud tests require being logged into `gcloud` and authenticated with a project that has access
to the cloud test data. They also require setting several certain environment variables.
Expand Down
128 changes: 85 additions & 43 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -174,19 +174,6 @@ configurations.all {
all*.exclude group: 'junit', module: 'junit'
}


jacocoTestReport {
dependsOn test
group = "Reporting"
description = "Generate Jacoco coverage reports after running tests."
additionalSourceDirs = files(sourceSets.main.allJava.srcDirs)

reports {
xml.enabled = true
html.enabled = true
}
}

//NOTE: we ignore contracts for now
compileJava {
options.compilerArgs = ['-proc:none', '-Xlint:all', '-Werror', '-Xdiags:verbose']
Expand Down Expand Up @@ -379,7 +366,6 @@ version = (isRelease ? gitVersion() : gitVersion() + "-SNAPSHOT").replaceAll(".d
logger.info("build for version:" + version)
group = 'org.broadinstitute'


tasks.withType(Jar) {
manifest {
attributes 'Implementation-Title': 'The Genome Analysis Toolkit (GATK)',
Expand All @@ -390,7 +376,33 @@ tasks.withType(Jar) {
}
}

test {
task wrapper(type: Wrapper) {
gradleVersion = '3.1'
}

tasks.withType(ShadowJar) {
from(project.sourceSets.main.output)
baseName = project.name + '-package'
mergeServiceFiles()
relocate 'com.google.common', 'org.broadinstitute.hellbender.relocated.com.google.common'
zip64 true
exclude 'log4j.properties' // from adam jar as it clashes with hellbender's log4j2.xml
exclude '**/*.SF' // these are Manifest signature files and
exclude '**/*.RSA' // keys which may accidentally be imported from other signed projects and then fail at runtime

// Suggested by the akka devs to make sure that we do not get the spark configuration error.
// http://doc.akka.io/docs/akka/snapshot/general/configuration.html#When_using_JarJar__OneJar__Assembly_or_any_jar-bundler
transform(com.github.jengelman.gradle.plugins.shadow.transformers.AppendingTransformer) {
resource = 'reference.conf'
}
}

//============================================================================================================================
// WARNING
//============================================================================================================================
// This is duplicated code with the build script in scripts/docker/dockertest.gradle. If you alter the tests in any way
// please make sure to alter this test method there as well.
tasks.withType(Test) {
outputs.upToDateWhen { false } //tests will never be "up to date" so you can always rerun them
String TEST_VERBOSITY = "$System.env.TEST_VERBOSITY"

Expand All @@ -401,7 +413,11 @@ test {
* anything else : run the non-cloud tests
*/
String TEST_TYPE = "$System.env.TEST_TYPE"

//============================================================================================================================
// WARNING
//============================================================================================================================
// This is duplicated code with the build script in scripts/docker/dockertest.gradle. If you alter the tests in any way
// please make sure to alter this test method there as well.
useTestNG {
if (TEST_TYPE == "cloud") {
// run only the cloud tests
Expand Down Expand Up @@ -473,28 +489,11 @@ test {
}
}
}
}


task wrapper(type: Wrapper) {
gradleVersion = '3.1'
}

tasks.withType(ShadowJar) {
from(project.sourceSets.main.output)
baseName = project.name + '-package'
mergeServiceFiles()
relocate 'com.google.common', 'org.broadinstitute.hellbender.relocated.com.google.common'
zip64 true
exclude 'log4j.properties' // from adam jar as it clashes with hellbender's log4j2.xml
exclude '**/*.SF' // these are Manifest signature files and
exclude '**/*.RSA' // keys which may accidentally be imported from other signed projects and then fail at runtime

// Suggested by the akka devs to make sure that we do not get the spark configuration error.
// http://doc.akka.io/docs/akka/snapshot/general/configuration.html#When_using_JarJar__OneJar__Assembly_or_any_jar-bundler
transform(com.github.jengelman.gradle.plugins.shadow.transformers.AppendingTransformer) {
resource = 'reference.conf'
}
//============================================================================================================================
// WARNING
//============================================================================================================================
// This is duplicated code with the build script in scripts/docker/dockertest.gradle. If you alter the tests in any way
// please make sure to alter this test method there as well.
}

shadowJar {
Expand Down Expand Up @@ -524,7 +523,27 @@ task sparkJar(type: ShadowJar) {
}
}

task bundle(type: Zip) {
// A jar that only contains the test classes and resources (to be extracted for testing)
task shadowTestClassJar(type: ShadowJar){
group = "Shadow"
from sourceSets.test.output
description = "Create a jar that packages the compiled test classes"
classifier = "test"
}

// A minimal jar that only contains the extra dependencies needed for running the tests
task shadowTestJar(type: ShadowJar){
group = "Shadow"
description = " A minimal jar that only contains the extra dependencies needed for running the tests that arent packaged in the main shadow jar"
from {
(project.configurations.testRuntime - project.configurations.runtime ).collect {
it.isDirectory() ? it : it.getName().endsWith(".jar") ? zipTree(it) : it
}
}
classifier = "testDependencies"
}

task collectBundleIntoDir(type: Copy) {
dependsOn shadowJar, sparkJar, 'condaEnvironmentDefinition', 'gatkTabComplete', 'gatkDoc'

doFirst {
Expand All @@ -534,10 +553,6 @@ task bundle(type: Zip) {
assert file("src/main/resources/org/broadinstitute/hellbender/utils/config/GATKConfig.properties").exists()
}

baseName = project.name + "-" + project.version
destinationDir file("$buildDir")
archiveName baseName + ".zip"

from(shadowJar.archivePath)
from(sparkJar.archivePath)
from("gatk")
Expand All @@ -552,13 +567,40 @@ task bundle(type: Zip) {
from("$buildDir/$pythonPackageArchiveName")
from("$buildDir/$gatkCondaYML")
from("$buildDir/$gatkCondaIntelYML")
from("scripts/sv", { into("scripts/sv") })
from("scripts/cnv_wdl/", { into("scripts/cnv_wdl") })
from("scripts/mutect2_wdl/", { into("scripts/mutect2_wdl") })
into "$buildDir/bundle-files-collected"
}

task bundle(type: Zip) {
dependsOn collectBundleIntoDir

baseName = project.name + "-" + project.version
destinationDir file("$buildDir")
archiveName baseName + ".zip"

from("$buildDir/bundle-files-collected")
into(baseName)

doLast {
logger.lifecycle("Created GATK distribution in ${destinationDir}/${archiveName}")
}
}

jacocoTestReport {
dependsOn test

group = "Reporting"
description = "Generate Jacoco coverage reports after running tests."
additionalSourceDirs = files(sourceSets.main.allJava.srcDirs)

reports {
xml.enabled = true
html.enabled = true
}
}

task condaStandardEnvironmentDefinition(type: Copy) {
from "scripts"
into buildDir
Expand Down
25 changes: 21 additions & 4 deletions build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ Optional arguments: \n \
exit 1
fi


# -z is like "not -n"
if [ -z ${IS_NOT_LATEST} ] && [ -n "${IS_HASH}" ] && [ -n "${IS_PUSH}" ]; then
echo -e "\n##################"
Expand Down Expand Up @@ -113,8 +112,22 @@ if [ -n "${IS_PUSH}" ]; then
else
RELEASE=false
fi
./gradlew clean collectBundleIntoDir shadowTestClassJar shadowTestJar -Drelease=$RELEASE
ZIPPATHGATK=$( find ./build -name "*bundle-files-collected" )
mv ${ZIPPATHGATK} ./unzippedJar
ZIPPATHPYTHON=$( find ./unzippedJar -name "gatkPython*.zip" )
unzip -o -j ${ZIPPATHPYTHON} -d ./unzippedJar/scripts

mkdir ${STAGING_ABSOLUTE_PATH:-.}/testJars
mv $( find ./build/libs/ -name "gatk*test.jar" ) ${STAGING_ABSOLUTE_PATH:-.}/testJars
mv $( find ./build/libs/ -name "gatk*testDependencies.jar" ) ${STAGING_ABSOLUTE_PATH:-.}/testJars

echo "Building image to tag ${REPO_PRJ}:${GITHUB_TAG}..."
docker build -t ${REPO_PRJ}:${GITHUB_TAG} --build-arg DRELEASE=$RELEASE .
if [ -n "${IS_PUSH}" ]; then
docker build -t ${REPO_PRJ}:${GITHUB_TAG} --squash --build-arg ZIPPATH=./unzippedJar .
else
docker build -t ${REPO_PRJ}:${GITHUB_TAG} --build-arg ZIPPATH=./unzippedJar .
fi

if [ -z "${IS_NOT_RUN_UNIT_TESTS}" ] ; then

Expand All @@ -128,9 +141,13 @@ if [ -z "${IS_NOT_RUN_UNIT_TESTS}" ] ; then
git lfs pull
chmod -R a+w ${STAGING_ABSOLUTE_PATH}/src/test/resources

echo docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}/src/test/resources:/testdata -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}/src/test/resources:/testdata -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
cp build.gradle build.gradle.backup
cp /scripts/docker/dockertest.gradle .

echo docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}:/gatkCloneMountPoint -v ${STAGING_ABSOLUTE_PATH}/testJars:/jars -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
docker run ${REMOVE_CONTAINER_STRING} -v ${STAGING_ABSOLUTE_PATH}:/gatkCloneMountPoint -v ${STAGING_ABSOLUTE_PATH}/testJars:/jars -t ${REPO_PRJ}:${GITHUB_TAG} bash /root/run_unit_tests.sh
echo " Unit tests passed..."
mv build.gradle.backup build.gradle
fi

## Push
Expand Down
Loading

0 comments on commit d6a7fcd

Please sign in to comment.