From 77808cd0e12d4fa92b0b930e4185f07a7a17c361 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Tue, 21 Jun 2022 14:54:07 +1200 Subject: [PATCH] Using parts of #344 by @obasekiosa (thanks!), and Commons IO to traverse and delete old files and directories. --- pom.xml | 6 ++ .../java/org/commonwl/view/Scheduler.java | 92 +++++++++++++++++++ src/main/resources/application.properties | 6 ++ 3 files changed, 104 insertions(+) diff --git a/pom.xml b/pom.xml index 2ff6c59a..8d698292 100644 --- a/pom.xml +++ b/pom.xml @@ -145,6 +145,12 @@ commons-compress 1.21 + + + commons-io + commons-io + 2.11.0 + org.springframework.boot diff --git a/src/main/java/org/commonwl/view/Scheduler.java b/src/main/java/org/commonwl/view/Scheduler.java index b8a73c7c..6bc75835 100644 --- a/src/main/java/org/commonwl/view/Scheduler.java +++ b/src/main/java/org/commonwl/view/Scheduler.java @@ -1,6 +1,9 @@ package org.commonwl.view; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.file.AccumulatorPathVisitor; +import org.apache.commons.io.filefilter.AgeFileFilter; import org.commonwl.view.workflow.QueuedWorkflowRepository; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -9,8 +12,21 @@ import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.time.Instant; +import java.util.Arrays; import java.util.Calendar; +import java.util.Collections; import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; /** * Scheduler class for recurrent processes. @@ -24,6 +40,16 @@ public class Scheduler { @Value("${queuedWorkflowAgeLimitHours}") private Integer QUEUED_WORKFLOW_AGE_LIMIT_HOURS; + @Value("${tmpDirAgeLimitDays}") + private Integer TMP_DIR_AGE_LIMIT_DAYS; + + @Value("${bundleStorage}") + private String bundleStorage; + @Value("${graphvizStorage}") + private String graphvizStorage; + @Value("${gitStorage}") + private String gitStorage; + @Autowired public Scheduler(QueuedWorkflowRepository queuedWorkflowRepository) { this.queuedWorkflowRepository = queuedWorkflowRepository; @@ -55,4 +81,70 @@ public void removeOldQueuedWorkflowEntries() { logger.info(queuedWorkflowRepository.deleteByTempRepresentation_RetrievedOnLessThanEqual(removeTime) + " Old queued workflows removed"); } + + /** + * Scheduled function to delete old temporary directories. + * + *

Will scan each temporary directory (graphviz, RO, git), searching + * for files exceeding a specified threshold.

+ * + *

It scans the first level directories, i.e. it does not recursively + * scans directories. So it will delete any RO or Git repository directories + * that exceed the threshold. Similarly, it will delete any graph (svg, png, + * etc) that also exceed it.

+ * + *

Errors logged through Logger. Settings in Spring application properties + * file.

+ * + * @since 1.4.5 + */ + @Scheduled(cron = "${cron.clearTmpDir}") + public void clearTmpDir() { + // Temporary files used for graphviz, RO, and git may be stored in different + // locations, so we will collect all of them here. + List temporaryDirectories = Stream.of(bundleStorage, graphvizStorage, gitStorage) + .distinct() + .toList(); + temporaryDirectories.forEach(this::clearDirectory); + } + + /** + * For a given temporary directory, scans it (not recursively) for files and + * directories exceeding the age limit threshold. + * + * @since 1.4.5 + * @see https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/AgeFileFilter.html + * @param temporaryDirectory temporary directory + */ + private void clearDirectory(String temporaryDirectory) { + final Path dir = Paths.get(temporaryDirectory); + final Instant cutoff = Instant.now().minus(Duration.ofDays(TMP_DIR_AGE_LIMIT_DAYS)); + // TODO: Commons IO 2.12 has a constructor that takes an Instant; drop the Date#from call here when we upgrade. + final AgeFileFilter fileAndDirFilter = new AgeFileFilter(Date.from(cutoff)); + final AccumulatorPathVisitor visitor = AccumulatorPathVisitor.withLongCounters(fileAndDirFilter, fileAndDirFilter); + + // Walk the files. + try { + Files.walkFileTree(dir, Collections.emptySet(), /* maxDepth */ 1, visitor); + } catch (IOException e) { + // Really unexpected. walkFileTree should throw an IllegalArgumentException for negative maxDepth (clearly + // not happening here), a SecurityException if the security manager denies access, or this IOException in + // the cases where an I/O error happened (disk error, OS error, file not found, etc.). So just a warning. + logger.warn(String.format("Unexpected I/O error was thrown walking directory [%s]: %s", dir.toString(), e.getMessage()), e); + } + + // Delete the directories accumulated by the visitor. + final List dirList = visitor.getDirList(); + dirList.forEach(tooOldDeleteMe -> { + File fileToDelete = tooOldDeleteMe.toFile(); + try { + FileUtils.forceDelete(fileToDelete); + } catch (IOException e) { + // Here we probably have a more serious case. Since the Git repository, RO directory, or graphs are + // not expected to be in use, and the application must have access, I/O errors are not expected and + // must be treated as errors. + logger.error(String.format("Failed to delete old temporary file or directory [%s]: %s", fileToDelete.getAbsolutePath(), e.getMessage()), e); + } + }); + } } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index f47d9986..e0ffadc6 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -75,6 +75,12 @@ cron.deleteOldQueuedWorkflows = 0 0 * * * ? # Age limit for queued workflows in hours. queuedWorkflowAgeLimitHours = 24 +# The expression below implies every day at the 0th second, 0th minute and 24th(0th) hour i.e ( time 00:00:00, every day) +cron.clearTmpDir = 0 0 0 * * ? + +# Age limit for tmp directories in days. +tmpDirAgeLimitDays = 1 + #======================= # DB migrations #=======================