Skip to content

Commit

Permalink
Integrated code lifecycle: Regularly clean up stranded build job cont…
Browse files Browse the repository at this point in the history
…ainers (#8312)
  • Loading branch information
laurenzfb authored and Stephan Krusche committed Apr 4, 2024
1 parent 5cfbe76 commit 123314a
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static de.tum.in.www1.artemis.config.Constants.PROFILE_BUILDAGENT;

import java.time.Instant;
import java.time.ZonedDateTime;
import java.time.temporal.ChronoUnit;
import java.util.*;
Expand Down Expand Up @@ -44,6 +45,8 @@ public class LocalCIDockerService {

private final HazelcastInstance hazelcastInstance;

private boolean isFirstCleanup = true;

@Value("${artemis.continuous-integration.image-cleanup.enabled:false}")
private Boolean imageCleanupEnabled;

Expand All @@ -53,27 +56,57 @@ public class LocalCIDockerService {
@Value("${artemis.continuous-integration.build-container-prefix:local-ci-}")
private String buildContainerPrefix;

// with the default value, containers running for longer than 5 minutes when the cleanup starts
@Value("${artemis.continuous-integration.container-cleanup.expiry-minutes:5}")
private int containerExpiryMinutes;

// With the default value, the cleanup is triggered every 60 minutes
@Value("${artemis.continuous-integration.container-cleanup.cleanup-schedule-minutes:60}")
private int containerCleanupScheduleMinutes;

public LocalCIDockerService(DockerClient dockerClient, HazelcastInstance hazelcastInstance) {
this.dockerClient = dockerClient;
this.hazelcastInstance = hazelcastInstance;
}

/**
* Removes all stranded build containers after the application has started
*/
@EventListener(ApplicationReadyEvent.class)
public void applicationReady() {
// NOTE: we delay this after startup, because this can take several seconds and can block the startup of the build agent otherwise
// remove all stranded build containers after 10s
var executor = Executors.newScheduledThreadPool(1);
executor.schedule(() -> {
log.info("Start cleanup stranded build containers");
var buildContainers = dockerClient.listContainersCmd().withShowAll(true).exec().stream()
.filter(container -> container.getNames()[0].startsWith("/" + buildContainerPrefix)).toList();
log.info("Found {} stranded build containers", buildContainers.size());
buildContainers.forEach(container -> dockerClient.removeContainerCmd(container.getId()).withForce(true).exec());
log.info("Cleanup stranded build containers done");
}, 10, TimeUnit.SECONDS);
// Schedule the cleanup of stranded build containers once 10 seconds after the application has started and then every containerCleanupScheduleHour hours
ScheduledExecutorService scheduledExecutorService = Executors.newScheduledThreadPool(1);
scheduledExecutorService.scheduleAtFixedRate(this::cleanUpContainers, 10, containerCleanupScheduleMinutes * 60L, TimeUnit.SECONDS);
}

/**
* Removes all stranded build containers
*/
public void cleanUpContainers() {
List<Container> buildContainers;
log.info("Start cleanup stranded build containers");
if (isFirstCleanup) {
// Cleanup all stranded build containers after the application has started
try {
buildContainers = dockerClient.listContainersCmd().withShowAll(true).exec().stream()
.filter(container -> container.getNames()[0].startsWith("/" + buildContainerPrefix)).toList();
}
finally {
isFirstCleanup = false;
}
}
else {
// Cleanup all containers that are older than 5 minutes for all subsequent cleanups
// Get current time in milliseconds
long now = Instant.now().toEpochMilli();

// Threshold for "stuck" containers in milliseconds
long ageThreshold = containerExpiryMinutes * 60L * 1000L;

buildContainers = dockerClient.listContainersCmd().withShowAll(true).exec().stream().filter(container -> container.getNames()[0].startsWith("/" + buildContainerPrefix))
.filter(container -> (now - container.getCreated()) > ageThreshold).toList();
}

log.info("Found {} stranded build containers", buildContainers.size());
buildContainers.forEach(container -> dockerClient.removeContainerCmd(container.getId()).withForce(true).exec());
log.info("Cleanup stranded build containers done");
}

/**
Expand Down
3 changes: 3 additions & 0 deletions src/main/resources/config/application-buildagent.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ artemis:
enabled: false
expiry-days: 2
cleanup-schedule-time: 0 0 3 * * *
container-cleanup:
expiry-minutes: 5
cleanup-schedule-minutes: 60
git:
name: Artemis
email: artemis@xcit.tum.de
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import static org.mockito.Mockito.*;

import java.time.ZonedDateTime;
import java.util.List;

import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;

import com.github.dockerjava.api.command.InspectImageCmd;
import com.github.dockerjava.api.command.ListContainersCmd;
import com.github.dockerjava.api.exception.NotFoundException;
import com.github.dockerjava.api.model.Container;
import com.hazelcast.core.HazelcastInstance;
import com.hazelcast.map.IMap;

Expand Down Expand Up @@ -86,4 +89,33 @@ void testPullDockerImage() {
// Verify that pullImageCmd() was called.
verify(dockerClient, times(1)).pullImageCmd("test-image-name");
}

@Test
void testRemoveStrandedContainers() {

// Mocks
ListContainersCmd listContainersCmd = mock(ListContainersCmd.class);
doReturn(listContainersCmd).when(dockerClient).listContainersCmd();
doReturn(listContainersCmd).when(listContainersCmd).withShowAll(true);

Container mockContainer = mock(Container.class);
doReturn(List.of(mockContainer)).when(listContainersCmd).exec();
doReturn(new String[] { "/local-ci-dummycontainer" }).when(mockContainer).getNames();
// Mock container creation time to be older than 5 minutes
doReturn(System.currentTimeMillis() - (6 * 60 * 1000)).when(mockContainer).getCreated();
doReturn("dummy-container-id").when(mockContainer).getId();

localCIDockerService.cleanUpContainers();

// Verify that removeContainerCmd() was called
verify(dockerClient, times(1)).removeContainerCmd(anyString());

// Mock container creation time to be younger than 5 minutes
doReturn(System.currentTimeMillis()).when(mockContainer).getCreated();

localCIDockerService.cleanUpContainers();

// Verify that removeContainerCmd() was not called a second time
verify(dockerClient, times(1)).removeContainerCmd(anyString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ public DockerClient dockerClient() throws InterruptedException {
doReturn(removeImageCmd).when(dockerClient).removeImageCmd(anyString());
doNothing().when(removeImageCmd).exec();

// Mock removeContainerCmd
RemoveContainerCmd removeContainerCmd = mock(RemoveContainerCmd.class);
doReturn(removeContainerCmd).when(dockerClient).removeContainerCmd(anyString());
doReturn(removeContainerCmd).when(removeContainerCmd).withForce(true);

return dockerClient;
}
}

0 comments on commit 123314a

Please sign in to comment.