Skip to content

Commit

Permalink
Merge branch 'develop_aws' into add-gpu-count
Browse files Browse the repository at this point in the history
  • Loading branch information
henriqueribeiro authored Dec 22, 2023
2 parents 4703181 + 6eb71bb commit 4540048
Show file tree
Hide file tree
Showing 10 changed files with 308 additions and 118 deletions.
2 changes: 2 additions & 0 deletions backend/src/main/scala/cromwell/backend/backend.scala
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,12 @@ object CommonBackendConfigurationAttributes {
"default-runtime-attributes.docker",
"default-runtime-attributes.queueArn",
"default-runtime-attributes.awsBatchRetryAttempts",
"default-runtime-attributes.maxRetries",
"default-runtime-attributes.awsBatchEvaluateOnExit",
"default-runtime-attributes.ulimits",
"default-runtime-attributes.efsDelocalize",
"default-runtime-attributes.efsMakeMD5",
"default-runtime-attributes.tagResources",
"default-runtime-attributes.failOnStderr",
"slow-job-warning-time",
"dockerhub",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ import wom.callable.{AdHocValue, CommandTaskDefinition, ContainerizedInputExpres
import wom.expression.WomExpression
import wom.graph.LocalName
import wom.values._
import wom.types._
import wom.{CommandSetupSideEffectFile, InstantiatedCommand, WomFileMapper}

import java.net.URLDecoder
import scala.concurrent._
import scala.concurrent.duration._
import scala.util.{Failure, Success, Try}
Expand Down Expand Up @@ -165,7 +166,12 @@ trait StandardAsyncExecutionActor
if (inputsToNotLocalize.contains(womFile)) {
womFile
} else {
mapper(womFile)
// resolve
val mapped_wom = mapper(womFile)
// decode url encoded values generated by the mapper
val decoded_womValue = URLDecoder.decode(mapped_wom.valueString,"UTF-8")
// convert to womfile again
WomFile(WomSingleFileType, decoded_womValue)
}
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

package cromwell.backend.impl.aws

import java.net.SocketTimeoutException
import java.net.{SocketTimeoutException, URLDecoder}
import java.io.FileNotFoundException
import java.nio.file.Paths

Expand Down Expand Up @@ -202,7 +202,8 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar
configuration.fsxMntPoint,
configuration.efsMntPoint,
Option(runtimeAttributes.efsMakeMD5),
Option(runtimeAttributes.efsDelocalize))
Option(runtimeAttributes.efsDelocalize),
Option(runtimeAttributes.tagResources))
}

// setup batch client to query job container info
Expand Down Expand Up @@ -262,13 +263,13 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar
case Success(path: S3Path) =>
configuration.fileSystem match {
case AWSBatchStorageSystems.s3 =>
path.pathWithoutScheme
URLDecoder.decode(path.pathWithoutScheme,"UTF-8")
case _ =>
path.toString
URLDecoder.decode(path.toString,"UTF-8")
}
// non-s3 paths
case _ =>
value
URLDecoder.decode(value,"UTF-8")
}
)
}
Expand Down Expand Up @@ -421,7 +422,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar
AwsBatchFileOutput(makeSafeAwsBatchReferenceName(womFile.value), womFile.value, relpath, disk)
} else {
// if efs is not enabled, OR efs delocalization IS enabled, keep the s3 path as destination.
AwsBatchFileOutput(makeSafeAwsBatchReferenceName(womFile.value), destination, relpath, disk)
AwsBatchFileOutput(makeSafeAwsBatchReferenceName(womFile.value), URLDecoder.decode(destination,"UTF-8"), relpath, disk)
}
List(output)
}
Expand Down Expand Up @@ -689,7 +690,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar
// STATUS LOGIC:
// - success : container exit code is zero
// - command failure: container exit code > 0, no statusReason in container
// - OOM kill : container exit code > 0, statusReason contains "OutOfMemory"
// - OOM kill : container exit code > 0, statusReason contains "OutOfMemory" OR exit code == 137
// - spot kill : no container exit code set. statusReason of ATTEMPT (not container) says "host EC2 (...) terminated"
Log.debug(s"Looking for memoryRetry in job '${job.jobId}'")
val describeJobsResponse = batchClient.describeJobs(DescribeJobsRequest.builder.jobs(job.jobId).build)
Expand Down Expand Up @@ -722,6 +723,9 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar
case "0" =>
Log.debug("container exit code was zero. job succeeded")
false
case "137" =>
Log.info("Job failed with Container status reason : 'OutOfMemory' (code:137)")
true
case _ =>
// failed job due to command errors (~ user errors) don't have a container exit reason.
val containerStatusReason:String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ case class AwsBatchAttributes(fileSystem: String,
fsxMntPoint: Option[List[String]],
efsMntPoint: Option[String],
efsMakeMD5: Option[Boolean],
tagResources: Option[Boolean],
efsDelocalize: Option[Boolean],
globLinkCommand: Option[String],
checkSiblingMd5: Option[Boolean]
Expand Down Expand Up @@ -92,6 +93,8 @@ object AwsBatchAttributes {
"gpuCount",
"efsDelocalize",
"efsMakeMD5",
"tagResources",
"maxRetries",
"glob-link-command"
)

Expand Down Expand Up @@ -182,6 +185,13 @@ object AwsBatchAttributes {
case false => None
}
}
// from config if set:
val tagResources:ErrorOr[Option[Boolean]] = validate {
backendConfig.hasPath("default-runtime-attributes.tagResources") match {
case true => Some(backendConfig.getBoolean("default-runtime-attributes.tagResources"))
case false => None
}
}
// from config if set.
val globLinkCommand:ErrorOr[Option[String]] = validate {
backendConfig.hasPath("glob-link-command") match {
Expand All @@ -208,6 +218,7 @@ object AwsBatchAttributes {
efsMntPoint,
efsMakeMD5,
efsDelocalize,
tagResources,
globLinkCommand,
checkSiblingMd5
).tupled.map((AwsBatchAttributes.apply _).tupled) match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class AwsBatchConfiguration(val configurationDescriptor: BackendConfigurationDes
val efsMntPoint = batchAttributes.efsMntPoint
val efsMakeMD5 = batchAttributes.efsMakeMD5
val efsDelocalize = batchAttributes.efsDelocalize
val tagResources = batchAttributes.tagResources
val globLinkCommand = batchAttributes.globLinkCommand
val checkSiblingMd5 = batchAttributes.checkSiblingMd5
}
Expand Down
Loading

0 comments on commit 4540048

Please sign in to comment.