Skip to content

Commit

Permalink
use Airlock for PyPi packages (GoogleCloudPlatform#1981)
Browse files Browse the repository at this point in the history
* use Airlock for PyPi packages

Signed-off-by: Jeffrey Kinard <[email protected]>

* remove java caching logic

Signed-off-by: Jeffrey Kinard <[email protected]>

---------

Signed-off-by: Jeffrey Kinard <[email protected]>
  • Loading branch information
Polber authored Nov 1, 2024
1 parent 515e05f commit 5cc6570
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 47 deletions.
6 changes: 6 additions & 0 deletions .mvn/settings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<!-- Also update repo values in plugins/core-plugin/s.m.j.c.g.c.t/plugin/DockerfileGenerator.java-->
<repositories>
<repository>
<id>central</id>
Expand All @@ -26,6 +27,11 @@
</snapshots>
</repository>
</repositories>
<properties>
<airlockJavaRepo>ah-3p-staging-maven</airlockJavaRepo>
<airlockPythonRepo>python-3p</airlockPythonRepo>
<saSecretName>default-service-account-key-file</saSecretName>
</properties>
</profile>
</profiles>
<mirrors>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ public class DockerfileGenerator {
public static final String JAVA_LAUNCHER_ENTRYPOINT =
"/opt/google/dataflow/java_template_launcher";

// Keep pythonVersion below in sync with version in image
// Keep pythonVersion below in sync with version in base image
public static final String PYTHON_VERSION = "3.11";
public static final String DEFAULT_WORKING_DIRECTORY = "/template";

private static final String SA_SECRET_NAME_KEY = "saSecretName";
private static final String AIRLOCK_PYTHON_REPO_KEY = "airlockPythonRepo";

private static final Logger LOG = Logger.getLogger(DockerfileGenerator.class.getName());

protected Map<String, Object> parameters;
Expand Down Expand Up @@ -121,6 +124,8 @@ public void generate() throws IOException, TemplateException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
OutputStreamWriter writer = new OutputStreamWriter(baos);

configureAirlock(parameters);

try {
template.process(parameters, writer);
writer.flush();
Expand All @@ -135,6 +140,41 @@ public void generate() throws IOException, TemplateException {
}
}

private static void configureAirlock(Map<String, Object> parameters) {
String baseConfig = "";
if (parameters.containsKey(SA_SECRET_NAME_KEY)
&& parameters.containsKey(AIRLOCK_PYTHON_REPO_KEY)) {
baseConfig =
"RUN mkdir -p $HOME/.pip\n"
+ "RUN mkdir -p $HOME/.config/pip"
+ "\n"
+ "RUN printf \""
+ "[distutils]\\n"
+ "index-servers =\\n"
+ " AIRLOCK_PYTHON_REPO_KEY"
+ "\\n\\n"
+ "[AIRLOCK_PYTHON_REPO_KEY]\\n"
+ "repository: https://us-python.pkg.dev/artifact-foundry-prod/AIRLOCK_PYTHON_REPO_KEY/\\n"
+ "username: _json_key_base64\\n"
+ "password: $(gcloud secrets versions access latest --secret=SA_SECRET_NAME_KEY | base64 -w 0)\""
+ " > $HOME/.pip/.pypirc"
+ "\n"
+ "RUN printf \""
+ "[global]\\n"
+ "index-url = "
+ "https://_json_key_base64:"
+ "$(gcloud secrets versions access latest --secret=SA_SECRET_NAME_KEY | base64 -w 0)"
+ "@us-python.pkg.dev/artifact-foundry-prod/AIRLOCK_PYTHON_REPO_KEY/simple/\""
+ " > $HOME/.config/pip/pip.conf";
baseConfig =
baseConfig
.replaceAll(
"AIRLOCK_PYTHON_REPO_KEY", parameters.get(AIRLOCK_PYTHON_REPO_KEY).toString())
.replaceAll("SA_SECRET_NAME_KEY", parameters.get(SA_SECRET_NAME_KEY).toString());
}
parameters.put("airlockConfig", baseConfig);
}

/** Builder for {@link DockerfileGenerator}. */
public static class Builder {

Expand Down Expand Up @@ -332,6 +372,21 @@ public Builder addStringParameter(String parameter, String value) {
return addParameter(parameter, value);
}

public Builder setAirlockPythonRepo(String airlockPythonRepo) {
this.parameters.put(AIRLOCK_PYTHON_REPO_KEY, airlockPythonRepo);
return this;
}

public Builder setAirlockJavaRepo(String airlockJavaRepo) {
this.parameters.put("airlockJavaRepo", airlockJavaRepo);
return this;
}

public Builder setServiceAccountSecretName(String secretName) {
this.parameters.put(SA_SECRET_NAME_KEY, secretName);
return this;
}

/**
* Set a custom parameter in the Dockerfile template.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ ENV FLEX_TEMPLATE_PYTHON_PY_FILE=main.py

RUN if ! [ -f requirements.txt ] ; then >&2 echo "error: no requirements.txt file found" && exit 1 ; fi

# Set up custom PyPi repository, if applicable
${airlockConfig}

RUN pip install -U -r --require-hashes $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE
RUN pip download --require-hashes --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ WORKDIR $WORKDIR
# Throw if requirements.txt file not provided
RUN if ! [ -f requirements.txt ] ; then >&2 echo "error: no requirements.txt file found" && exit 1 ; fi

# Set up custom PyPi repository, if applicable
${airlockConfig}

# Install dependencies to launch the pipeline and download to reduce startup time
RUN python -m venv /venv \
&& /venv/bin/python -m ensurepip --upgrade \
Expand Down
17 changes: 11 additions & 6 deletions plugins/core-plugin/src/main/resources/Dockerfile-template-yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@ WORKDIR $WORKDIR
# Throw if requirements.txt file not provided
RUN if ! [ -f requirements.txt ] ; then >&2 echo "error: no requirements.txt file found" && exit 1 ; fi

# Set up custom PyPi repository, if applicable
${airlockConfig}

# Install dependencies to launch the pipeline and download to reduce startup time
# TODO - remove `pip uninstall apache-beam` line when repo is upgraded to Beam 2.59.0
RUN python -m venv /venv \
&& /venv/bin/pip uninstall apache-beam -y \
&& /venv/bin/pip install --require-hashes --no-cache-dir -U -r $REQUIREMENTS_FILE \
&& /venv/bin/pip install --require-hashes --no-deps --no-cache-dir -U -r $REQUIREMENTS_FILE \
&& /venv/bin/pip download --require-hashes --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \
&& rm -rf /usr/local/lib/python$PY_VERSION/site-packages \
&& rm -rf /usr/local/lib/python$PY_VERSION/site-packages \
&& mv /venv/lib/python$PY_VERSION/site-packages /usr/local/lib/python$PY_VERSION/

# Cache provider environments for faster startup and expansion time
RUN python -m apache_beam.yaml.cache_provider_artifacts


#============================================================#
# Create Distroless xlang image compatible with YamlTemplate #
#============================================================#
Expand All @@ -44,8 +44,8 @@ ENV FLEX_TEMPLATE_PYTHON_PY_FILE=main.py

# Copy template, python wheels and python launcher script from python-base
COPY --from=python-base /template /template
COPY --from=python-base /tmp/dataflow-requirements-cache /tmp/dataflow-requirements-cache
COPY --from=python-base /opt/google/dataflow/python_template_launcher /opt/google/dataflow/python_template_launcher
COPY --from=python-base /tmp/dataflow-requirements-cache /tmp/dataflow-requirements-cache

# Copy python and installed packages from python-base
COPY --from=python-base /usr/local/bin/python$PY_VERSION /usr/local/bin/python
Expand All @@ -61,6 +61,11 @@ COPY --from=python-base /usr/local/lib/libpython$PY_VERSION* /usr/local/lib/
COPY --from=python-base /bin/dash /bin/sh
COPY --from=python-base /usr/bin/which.debianutils /usr/bin/which

# Copy cached beam library cache from python-base
COPY --from=python-base /bin/mkdir /bin/mkdir
RUN mkdir -p /root/.apache_beam/cache
COPY --from=python-base /root/.apache_beam/cache /root/.apache_beam/cache

# Copy licenses
COPY --from=python-base /usr/licenses/ /usr/licenses/

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,6 @@ public void testGenerateYamlDockerfile() throws IOException, TemplateException {
assertThat(fileContents).contains("FROM a python container image");
assertThat(fileContents).contains("FROM a java container image");
assertThat(fileContents).contains("=py_version");
assertThat(fileContents)
.doesNotContainMatch(
"(?m)^(?!COPY main\\.py.*)(COPY(?!.*--from=).*/template.*$|COPY main\\.py.*)$");
assertThat(fileContents).contains("ENTRYPOINT [\"python/entry/point\"]");
}

Expand All @@ -212,6 +209,30 @@ public void testGenerateYamlDockerfileWithOtherFiles() throws IOException, Templ
assertThat(fileContents).contains("ENTRYPOINT [\"python/entry/point\"]");
}

@Test
public void testGenerateYamlDockerfileWithInternalMaven() throws IOException, TemplateException {
new File(outputFolder.getAbsolutePath() + "/" + containerName).mkdirs();
createDockerfileGeneratorBuilder(Template.TemplateType.YAML, outputFolder)
.setBasePythonContainerImage("a python container image")
.setBaseJavaContainerImage("a java container image")
.setPythonVersion("py_version")
.setServiceAccountSecretName("someSecret")
.setAirlockPythonRepo("airlockPythonRepo")
.build()
.generate();
File outputFile =
new File(outputFolder.getAbsolutePath() + "/" + containerName + "/Dockerfile");

assertTrue(outputFile.exists());
String fileContents = Files.asCharSource(outputFile, StandardCharsets.UTF_8).read();
assertThat(fileContents).contains("FROM a python container image");
assertThat(fileContents).contains("FROM a java container image");
assertThat(fileContents).contains("=py_version");
assertThat(fileContents).contains("gcloud secrets versions access latest --secret=someSecret");
assertThat(fileContents)
.contains("https://us-python.pkg.dev/artifact-foundry-prod/airlockPythonRepo");
}

@Test
public void testGenerateDockerfileAddParameterWithNullOrEmptyParameterName() {
DockerfileGenerator.Builder dockerfileBuilder =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import com.google.cloud.teleport.plugin.TemplateSpecsGenerator;
import com.google.cloud.teleport.plugin.model.ImageSpec;
import com.google.cloud.teleport.plugin.model.TemplateDefinitions;
import com.google.common.base.Strings;
import freemarker.template.TemplateException;
import java.io.File;
import java.io.FileWriter;
Expand Down Expand Up @@ -151,6 +152,17 @@ public class TemplatesStageMojo extends TemplatesBaseMojo {
@Parameter(defaultValue = "${unifiedWorker}", readonly = true, required = false)
protected boolean unifiedWorker;

@Parameter(defaultValue = "${saSecretName}", readonly = true, required = false)
protected String saSecretName;

@Parameter(defaultValue = "${airlockPythonRepo}", readonly = true, required = false)
protected String airlockPythonRepo;

@Parameter(defaultValue = "${airlockJavaRepo}", readonly = true, required = false)
protected String airlockJavaRepo;

private boolean internalMaven;

public TemplatesStageMojo() {}

public TemplatesStageMojo(
Expand Down Expand Up @@ -196,6 +208,7 @@ public TemplatesStageMojo(
this.pythonVersion = pythonVersion;
this.beamVersion = beamVersion;
this.unifiedWorker = unifiedWorker;
this.internalMaven = false;
}

public void execute() throws MojoExecutionException {
Expand Down Expand Up @@ -371,6 +384,16 @@ protected String stageFlexTemplate(
TemplateDefinitions definition, ImageSpec imageSpec, BuildPluginManager pluginManager)
throws MojoExecutionException, IOException, InterruptedException, TemplateException {

// These are set by the .mvn/settings.xml file. This tells the plugin to use Airlock repos
// for building artifacts in Dockerfile-based images (XLANG, PYTHON, YAML). Airlock deps are
// only available when running PRs on DataflowTemplates GitHub repo and when releasing
// internally, so avoid specifying these 3 parameters when building custom templates externally.
if (!Strings.isNullOrEmpty(saSecretName)
&& !Strings.isNullOrEmpty(airlockPythonRepo)
&& !Strings.isNullOrEmpty(airlockJavaRepo)) {
internalMaven = true;
}

// Override some image spec attributes available only during staging/release:
String version = TemplateDefinitionsParser.parseVersion(stagePrefix);
String containerName = definition.getTemplateAnnotation().flexContainerName();
Expand Down Expand Up @@ -558,22 +581,30 @@ private void stageFlexJavaTemplate(
destRequirements.toPath(),
StandardCopyOption.REPLACE_EXISTING);
}

// Generate Dockerfile
Set<String> directoriesToCopy = Set.of(containerName);
DockerfileGenerator.builder(
definition.getTemplateAnnotation().type(),
beamVersion,
containerName,
outputClassesDirectory)
.setBasePythonContainerImage(basePythonContainerImage)
.setBaseJavaContainerImage(baseContainerImage)
.setPythonVersion(pythonVersion)
.setEntryPoint(entryPoint)
.setCommandSpec(xlangCommandSpec)
.setFilesToCopy(filesToCopy)
.setDirectoriesToCopy(directoriesToCopy)
.build()
.generate();
DockerfileGenerator.Builder dockerfileBuilder =
DockerfileGenerator.builder(
definition.getTemplateAnnotation().type(),
beamVersion,
containerName,
outputClassesDirectory)
.setBasePythonContainerImage(basePythonContainerImage)
.setBaseJavaContainerImage(baseContainerImage)
.setPythonVersion(pythonVersion)
.setEntryPoint(entryPoint)
.setCommandSpec(xlangCommandSpec)
.setFilesToCopy(filesToCopy)
.setDirectoriesToCopy(directoriesToCopy);

// Set Airlock parameters
if (internalMaven) {
dockerfileBuilder
.setServiceAccountSecretName(saSecretName)
.setAirlockPythonRepo(airlockPythonRepo);
}

dockerfileBuilder.build().generate();
}

// Copy java classes and libs to build directory
Expand Down Expand Up @@ -654,18 +685,28 @@ private void stageFlexYamlTemplate(
if (entryPoint.isEmpty()) {
entryPoint = List.of(pythonTemplateLauncherEntryPoint);
}
DockerfileGenerator.builder(
definition.getTemplateAnnotation().type(),
beamVersion,
containerName,
outputClassesDirectory)
.setBasePythonContainerImage(basePythonContainerImage)
.setBaseJavaContainerImage(baseContainerImage)
.setPythonVersion(pythonVersion)
.setEntryPoint(entryPoint)
.setFilesToCopy(filesToCopy)
.build()
.generate();

// Generate Dockerfile
DockerfileGenerator.Builder dockerfileBuilder =
DockerfileGenerator.builder(
definition.getTemplateAnnotation().type(),
beamVersion,
containerName,
outputClassesDirectory)
.setBasePythonContainerImage(basePythonContainerImage)
.setBaseJavaContainerImage(baseContainerImage)
.setPythonVersion(pythonVersion)
.setEntryPoint(entryPoint)
.setFilesToCopy(filesToCopy);

// Set Airlock parameters
if (internalMaven) {
dockerfileBuilder
.setServiceAccountSecretName(saSecretName)
.setAirlockPythonRepo(airlockPythonRepo)
.setAirlockJavaRepo(airlockJavaRepo);
}
dockerfileBuilder.build().generate();
}

stageYamlUsingDockerfile(imagePath, containerName);
Expand Down Expand Up @@ -742,16 +783,25 @@ private void stageFlexPythonTemplate(
StandardCopyOption.REPLACE_EXISTING);
}

DockerfileGenerator.builder(
definition.getTemplateAnnotation().type(),
beamVersion,
containerName,
targetDirectory)
.setBasePythonContainerImage(basePythonContainerImage)
.setFilesToCopy(filesToCopy)
.setEntryPoint(entryPoint)
.build()
.generate();
// Generate Dockerfile
DockerfileGenerator.Builder dockerfileBuilder =
DockerfileGenerator.builder(
definition.getTemplateAnnotation().type(),
beamVersion,
containerName,
targetDirectory)
.setBasePythonContainerImage(basePythonContainerImage)
.setFilesToCopy(filesToCopy)
.setEntryPoint(entryPoint);

// Set Airlock parameters
if (internalMaven) {
dockerfileBuilder
.setServiceAccountSecretName(saSecretName)
.setAirlockPythonRepo(airlockPythonRepo);
}

dockerfileBuilder.build().generate();
}
stagePythonUsingDockerfile(imagePath, containerName);

Expand Down

0 comments on commit 5cc6570

Please sign in to comment.