Skip to content

Commit

Permalink
CS-968 Switch client library from docker-client to docker-java (pull …
Browse files Browse the repository at this point in the history
…request #119)

* Migrate getContainerEvents
* Migrate startDockerContainer and setSwarmServiceReplicasToOne
* Migrate DockerControlApiIntegrationTest to use new client
* Adjust docker http client timeouts
* Add another getDockerClientNew method with no args, use default server
* Massive change to migrate all integration test client operations use new client
* Migrate create container and create service
* Migrate delete image
* Slight tweaks to BUSYBOX constants and versions
* Migrate inspect image / get image by id
* Standardize docker-java exception handling
* Migrate list images
* Migrate ping
* Reuse RETURN_SELF answer in test mocks
* Migrate container/service logs
* Migrate getTaskForService
* Migrate pull image (with auth)
* Add support for generic resources (i.e. gpus) using a fork of docker-java
* Migrate TASK_STATE_FAILED to TaskState.FAILED
* Move client config into its own method
* Migrate AuthConfig and getting registry URL for image
* Complete removal of docker-client
* Fix hub ping
* Clean up some more fully qualified classes left over from transition
* Changelog

Approved-by: James Ransford
Approved-by: Bin Zhang
  • Loading branch information
johnflavin-fw committed May 31, 2024
1 parent 973ec78 commit a8f4615
Show file tree
Hide file tree
Showing 23 changed files with 1,098 additions and 1,349 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@
Not yet released.

* **Improvement** [CS-946][] Prevent setting mutually distinct k8s PVC mounting options
* **Bugfix** [CS-968][] Switch the docker API library we use from [docker-client][] to [docker-java][].
This should restore CS functionality on docker engine v25 and higher.

[CS-946]: https://radiologics.atlassian.net/browse/CS-946
[CS-968]: https://radiologics.atlassian.net/browse/CS-968
[docker-client]: https://github.com/dmandalidis/docker-client
[docker-java]: https://github.com/docker-java/docker-java

## 3.4.3
[Released](https://bitbucket.org/xnatdev/container-service/src/3.4.3/).
Expand Down
24 changes: 5 additions & 19 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ dependencyManagement {
}
}
}
def vDockerClient = "6.0.4"

def vDockerJava = "3.4.0.1"
def vAutoValue = "1.3"
def vKubernetesClient = "15.0.+"
def vPowerMock = "1.7.0"
def vGson = dependencyManagement.importedProperties["gson.version"]
def vJackson = dependencyManagement.importedProperties["jackson.version"]
def vSpringSecurity = dependencyManagement.importedProperties["spring-security.version"]
def vActiveMQ = dependencyManagement.importedProperties["activemq.version"]
def vJersey = "3.0.6"
def vBouncyCastle = "1.64" // Included in xnat-web via spring-security-jwt 1.1.1, see XNAT-7907

// Use this configuration to put dependencies into the fat jar
Expand All @@ -67,23 +67,8 @@ dependencies {

compileOnly "com.google.auto.value:auto-value:${vAutoValue}"

implementAndInclude ("org.mandas:docker-client:${vDockerClient}") {
exclude group: "ch.qos.logback"
exclude group: "org.slf4j"
exclude group: "org.bouncycastle" // included in xnat-web via spring-security-jwt
exclude group: "com.fasterxml.jackson.core"
exclude group: "org.apache.commons", module: "commons-compress" // included in xnat-web via parent
}

implementAndInclude "org.glassfish.jersey.core:jersey-client:${vJersey}"
implementAndInclude "org.glassfish.jersey.inject:jersey-hk2:${vJersey}"
implementAndInclude ("org.glassfish.jersey.connectors:jersey-apache-connector:${vJersey}") {
exclude group: "org.apache.httpcomponents", module: "httpclient" // included in xnat-web directly
}
implementAndInclude ("org.glassfish.jersey.media:jersey-media-json-jackson:${vJersey}") {
exclude group: "com.fasterxml.jackson.core" // Included in xnat-web directly
exclude group: "com.fasterxml.jackson.module", module: "jackson-module-jaxb-annotations" // Included in xnat-web via spawner -> jackson-dataformat-xml
}
implementAndInclude "com.github.docker-java:docker-java-core:${vDockerJava}"
implementAndInclude "com.github.docker-java:docker-java-transport-okhttp:${vDockerJava}"

implementAndInclude ("io.kubernetes:client-java:${vKubernetesClient}") {
exclude group: "com.google.code.gson"
Expand All @@ -95,6 +80,7 @@ dependencies {
exclude group: "org.slf4j"
exclude group: "org.bouncycastle"
exclude group: "org.yaml", module: "snakeyaml"
exclude group: "com.squareup.okhttp3", module: "okhttp" // Included via docker-java-transport-okhttp
}
implementAndInclude ("io.kubernetes:client-java-api:${vKubernetesClient}") { // Explicitly including to exclude transitive deps
exclude group: "com.google.code.gson"
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/nrg/containers/api/ContainerControlApi.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package org.nrg.containers.api;

import org.mandas.docker.client.exceptions.ServiceNotFoundException;
import org.mandas.docker.client.exceptions.TaskNotFoundException;
import org.nrg.containers.events.model.DockerContainerEvent;
import org.nrg.containers.exceptions.ContainerBackendException;
import org.nrg.containers.exceptions.ContainerException;
import org.nrg.containers.exceptions.DockerServerException;
import org.nrg.containers.exceptions.NoContainerServerException;
import org.nrg.containers.exceptions.NoDockerServerException;
import org.nrg.containers.exceptions.ServiceNotFoundException;
import org.nrg.containers.exceptions.TaskNotFoundException;
import org.nrg.containers.model.command.auto.ResolvedCommand;
import org.nrg.containers.model.container.auto.Container;
import org.nrg.containers.model.container.auto.ServiceTask;
Expand Down
1,092 changes: 535 additions & 557 deletions src/main/java/org/nrg/containers/api/DockerControlApi.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
package org.nrg.containers.events;

import com.github.dockerjava.api.model.TaskState;
import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
import org.mandas.docker.client.exceptions.ServiceNotFoundException;
import org.mandas.docker.client.exceptions.TaskNotFoundException;
import org.nrg.containers.api.ContainerControlApi;
import org.nrg.containers.api.KubernetesClientFactory;
import org.nrg.containers.events.model.DockerContainerEvent;
import org.nrg.containers.events.model.ServiceTaskEvent;
import org.nrg.containers.exceptions.*;
import org.nrg.containers.exceptions.ContainerException;
import org.nrg.containers.exceptions.DockerServerException;
import org.nrg.containers.exceptions.InvalidDefinitionException;
import org.nrg.containers.exceptions.NoContainerServerException;
import org.nrg.containers.exceptions.NoDockerServerException;
import org.nrg.containers.exceptions.ServiceNotFoundException;
import org.nrg.containers.exceptions.TaskNotFoundException;
import org.nrg.containers.model.container.auto.Container;
import org.nrg.containers.model.container.auto.ServiceTask;
import org.nrg.containers.model.server.docker.DockerServerBase.DockerServer;
Expand All @@ -28,8 +33,6 @@
import java.util.Date;
import java.util.List;

import static org.mandas.docker.client.messages.swarm.TaskStatus.TASK_STATE_FAILED;

@Slf4j
@Component
public class ContainerStatusUpdater implements Runnable {
Expand Down Expand Up @@ -339,7 +342,7 @@ private void throwLostTaskEventForService(@Nonnull final Container service) {
.serviceId(service.serviceId())
.taskId(null)
.nodeId(null)
.status(TASK_STATE_FAILED)
.status(TaskState.FAILED.getValue())
.swarmNodeError(true)
.statusTime(null)
.message(ServiceTask.swarmNodeErrMsg)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package org.nrg.containers.exceptions;

public class ServiceNotFoundException extends Exception {
public ServiceNotFoundException(Throwable e) {
super(e);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package org.nrg.containers.exceptions;

public class TaskNotFoundException extends Exception {
public TaskNotFoundException(Throwable e) {
super(e);
}
}
Original file line number Diff line number Diff line change
@@ -1,29 +1,26 @@
package org.nrg.containers.model.container.auto;

import com.github.dockerjava.api.model.Task;
import com.github.dockerjava.api.model.TaskState;
import com.github.dockerjava.api.model.TaskStatus;
import com.github.dockerjava.api.model.TaskStatusContainerStatus;
import com.google.auto.value.AutoValue;
import org.mandas.docker.client.messages.swarm.ContainerStatus;
import org.mandas.docker.client.messages.swarm.Task;
import org.mandas.docker.client.messages.swarm.TaskStatus;
import org.apache.commons.lang3.StringUtils;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.Date;
import javax.validation.constraints.NotNull;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

@AutoValue
public abstract class ServiceTask {
private static final Pattern successStatusPattern = Pattern.compile(TaskStatus.TASK_STATE_COMPLETE);
private static final Pattern successStatusPattern = Pattern.compile(TaskState.COMPLETE.getValue());
private static final Pattern exitStatusPattern = Pattern.compile(
StringUtils.join(
Arrays.asList(TaskStatus.TASK_STATE_FAILED, TaskStatus.TASK_STATE_COMPLETE,
TaskStatus.TASK_STATE_REJECTED, TaskStatus.TASK_STATE_SHUTDOWN), '|'));
private static final Pattern hasNotStartedPattern = Pattern.compile(
StringUtils.join(
Arrays.asList(TaskStatus.TASK_STATE_NEW, TaskStatus.TASK_STATE_ALLOCATED, TaskStatus.TASK_STATE_PENDING,
TaskStatus.TASK_STATE_ASSIGNED, TaskStatus.TASK_STATE_ACCEPTED, TaskStatus.TASK_STATE_PREPARING,
TaskStatus.TASK_STATE_READY, TaskStatus.TASK_STATE_STARTING), '|'));
Stream.of(TaskState.FAILED, TaskState.COMPLETE,
TaskState.REJECTED, TaskState.SHUTDOWN)
.map(TaskState::getValue)
.collect(Collectors.joining("|")));

public static String swarmNodeErrMsg = "Swarm node error";

Expand All @@ -38,37 +35,38 @@ public abstract class ServiceTask {
@Nullable public abstract String err();
@Nullable public abstract Long exitCode();

public static ServiceTask create(final @Nonnull Task task, final String serviceId) {
final ContainerStatus containerStatus = task.status().containerStatus();
Long exitCode = containerStatus == null ? null : containerStatus.exitCode();
public static ServiceTask create(final @NotNull Task task, final String serviceId) {
final TaskStatus taskStatus = task.getStatus();
final TaskStatusContainerStatus containerStatus = taskStatus.getContainerStatus();
Long exitCode = containerStatus == null ? null : containerStatus.getExitCodeLong();
// swarmNodeError occurs when node is terminated / spot instance lost while service still trying to run on it
// Criteria: current state = [not an exit status] AND either desired state = shutdown OR exit code = -1
// OR current state = shutdown
String curState = task.status().state();
String msg = task.status().message();
String err = task.status().err();
if (curState.equals(TaskStatus.TASK_STATE_PENDING)) {
TaskState curState = taskStatus.getState();
String msg = taskStatus.getMessage();
String err = taskStatus.getErr();
if (curState.equals(TaskState.PENDING)) {
msg = "";
err = "";
}
boolean swarmNodeError = (!isExitStatus(curState) &&
(task.desiredState().equals(TaskStatus.TASK_STATE_SHUTDOWN) || (exitCode != null && exitCode < 0))) ||
curState.equals(TaskStatus.TASK_STATE_SHUTDOWN);
boolean swarmNodeError = (!isExitStatus(curState.getValue()) &&
(task.getDesiredState().equals(TaskState.SHUTDOWN) || (exitCode != null && exitCode < 0))) ||
curState.equals(TaskState.SHUTDOWN);

if (swarmNodeError) {
msg = swarmNodeErrMsg;
}
return ServiceTask.builder()
.serviceId(serviceId)
.taskId(task.id())
.nodeId(task.nodeId())
.status(task.status().state())
.taskId(task.getId())
.nodeId(task.getNodeId())
.status(curState.getValue())
.swarmNodeError(swarmNodeError)
.statusTime(task.status().timestamp().toString())
.statusTime(taskStatus.getTimestamp())
.message(msg)
.err(err)
.exitCode(exitCode)
.containerId(containerStatus == null ? null : containerStatus.containerId())
.containerId(containerStatus == null ? null : containerStatus.getContainerID())
.build();
}

Expand Down Expand Up @@ -104,11 +102,6 @@ public boolean isSuccessfulStatus(){
return isSuccessfulStatus(status);
}

public boolean hasNotStarted() {
final String status = status();
return status == null || hasNotStartedPattern.matcher(status).matches();
}

public static Builder builder() {
return new AutoValue_ServiceTask.Builder();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
public interface DockerHubService extends BaseHibernateService<DockerHubEntity> {
DockerHubEntity retrieve(String name) throws NotUniqueException;
DockerHubEntity get(String name) throws NotUniqueException, NotFoundException;
DockerHubEntity getByUrl(final String url) throws NotUniqueException, NotFoundException;
DockerHub retrieveHub(long id);
DockerHub retrieveHub(String name) throws NotUniqueException;
DockerHub getHub(long hubId) throws NotFoundException;
DockerHub getHub(String hubName) throws NotFoundException, NotUniqueException;
DockerHub getByUrl(final String url);
List<DockerHub> getHubs();

void setDefault(DockerHub dockerHub, String username, String reason);
Expand All @@ -34,8 +34,6 @@ public interface DockerHubService extends BaseHibernateService<DockerHubEntity>
long getDefaultHubId();
DockerHub getDefault();

DockerHub getHubForImage(String imageName);

class DockerHubDeleteDefaultException extends RuntimeException {
public DockerHubDeleteDefaultException(final String message) {
super(message);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.nrg.containers.services.impl;

import com.github.dockerjava.api.model.TaskState;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
Expand All @@ -8,7 +9,6 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.mandas.docker.client.messages.swarm.TaskStatus;
import org.nrg.action.ClientException;
import org.nrg.containers.api.ContainerControlApi;
import org.nrg.containers.api.LogType;
Expand Down Expand Up @@ -288,7 +288,7 @@ public String apply(final Exception input) {
: "";
final boolean startsWithFailed = containerStatus.startsWith(PersistentWorkflowUtils.FAILED);
if (startsWithFailed ||
containerStatus.equals(TaskStatus.TASK_STATE_FAILED) ||
containerStatus.equals(TaskState.FAILED.getValue()) ||
containerStatus.equals("die")) {
// This history entry should give us the details that we need
if (startsWithFailed) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.dockerjava.api.model.TaskState;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import com.google.common.cache.CacheBuilder;
Expand All @@ -13,7 +14,6 @@
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.mandas.docker.client.messages.swarm.TaskStatus;
import org.nrg.action.ClientException;
import org.nrg.containers.api.ContainerControlApi;
import org.nrg.containers.api.LogType;
Expand Down Expand Up @@ -1072,7 +1072,7 @@ public boolean restartService(Container service, UserI userI) {
final ContainerHistory newHistoryItem = ContainerHistory.builder()
.entityType("service")
.entityId(null)
.status(TaskStatus.TASK_STATE_FAILED)
.status(TaskState.FAILED.getValue())
.exitCode("126")
.timeRecorded(now)
.externalTimestamp(String.valueOf(now.getTime()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
import org.mandas.docker.client.ImageRef;
import org.nrg.containers.daos.DockerHubDao;
import org.nrg.containers.exceptions.NotUniqueException;
import org.nrg.containers.model.dockerhub.DockerHubBase.DockerHub;
Expand Down Expand Up @@ -62,16 +61,18 @@ public DockerHubEntity get(final String name) throws NotUniqueException, NotFoun
}

@Override
public DockerHubEntity getByUrl(final String url) throws NotUniqueException, NotFoundException {
DockerHubEntity dockerHubEntity = getDao().findByUrl(url);
if (dockerHubEntity == null && url.startsWith("https")) {
// try with http (org.mandas.docker.client.ImageRef always returns https)
dockerHubEntity = getDao().findByUrl(url.replace("https", "http"));
}
if (dockerHubEntity == null) {
throw new NotFoundException("Could not find hub with name " + url);
public DockerHub getByUrl(final String url) {
try {
DockerHubEntity entity = getDao().findByUrl(url);
if (entity == null && url.startsWith("https")) {
// try with http (org.mandas.docker.client.ImageRef always returns https)
entity = getDao().findByUrl(url.replace("https", "http"));
}
return toPojo(entity, getDefaultHubId());
} catch (NotUniqueException e) {
log.error("Found multiple DockerHubs with url {}", url);
return null;
}
return dockerHubEntity;
}

@Override
Expand Down Expand Up @@ -148,18 +149,6 @@ public DockerHub getDefault() {
return retrieveHub(getDefaultHubId());
}

@Override
public DockerHub getHubForImage(String imageName) {
final String url = new ImageRef(imageName).getRegistryUrl();
DockerHubEntity entity = null;
try {
entity = getByUrl(url);
} catch (NotUniqueException | NotFoundException e) {
log.error("Unable to locate docker hub for url {}", url, e);
}
return toPojo(entity, getDefaultHubId());
}

@Override
public void setDefault(final DockerHub dockerHub, final String username, final String reason) {
setDefault(dockerHub.id(), username, reason);
Expand Down
Loading

0 comments on commit a8f4615

Please sign in to comment.