-
Notifications
You must be signed in to change notification settings - Fork 118
Python Bindings for launching PySpark Jobs from the JVM #364
Changes from all commits
d3cf58f
bafc13c
59d9f0a
4daf634
51105ca
bd30f40
720776e
4b5f470
1361a26
0abc3b1
0869b07
38d48ce
9bf7b9d
4561194
2cf96cc
eb1079a
4a6b779
363919a
9c7adb1
0388aa4
6acab03
5499f6d
e103225
4533df2
cc289f1
c267286
8045c94
41b6b8c
923f956
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,14 +24,19 @@ private[spark] trait ContainerLocalizedFilesResolver { | |
def resolveSubmittedAndRemoteSparkJars(): Seq[String] | ||
def resolveSubmittedSparkJars(): Seq[String] | ||
def resolveSubmittedSparkFiles(): Seq[String] | ||
def resolveSubmittedPySparkFiles(): Seq[String] | ||
def resolvePrimaryResourceFile(): String | ||
} | ||
|
||
private[spark] class ContainerLocalizedFilesResolverImpl( | ||
sparkJars: Seq[String], | ||
sparkFiles: Seq[String], | ||
pySparkFiles: Seq[String], | ||
primaryPyFile: String, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be an option? |
||
jarsDownloadPath: String, | ||
filesDownloadPath: String) extends ContainerLocalizedFilesResolver { | ||
|
||
|
||
override def resolveSubmittedAndRemoteSparkJars(): Seq[String] = { | ||
sparkJars.map { jar => | ||
val jarUri = Utils.resolveURI(jar) | ||
|
@@ -53,16 +58,30 @@ private[spark] class ContainerLocalizedFilesResolverImpl( | |
resolveSubmittedFiles(sparkFiles, filesDownloadPath) | ||
} | ||
|
||
private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { | ||
files.map { file => | ||
val fileUri = Utils.resolveURI(file) | ||
Option(fileUri.getScheme).getOrElse("file") match { | ||
case "file" => | ||
val fileName = new File(fileUri.getPath).getName | ||
s"$downloadPath/$fileName" | ||
case _ => | ||
file | ||
} | ||
override def resolveSubmittedPySparkFiles(): Seq[String] = { | ||
def filterMainResource(x: String) = x match { | ||
case `primaryPyFile` => None | ||
case _ => Some(resolveFile(x, filesDownloadPath)) | ||
} | ||
pySparkFiles.flatMap(x => filterMainResource(x)) | ||
} | ||
|
||
override def resolvePrimaryResourceFile(): String = { | ||
Option(primaryPyFile).map(p => resolveFile(p, filesDownloadPath)).getOrElse("") | ||
} | ||
|
||
private def resolveFile(file: String, downloadPath: String) = { | ||
val fileUri = Utils.resolveURI(file) | ||
Option(fileUri.getScheme).getOrElse("file") match { | ||
case "file" => | ||
val fileName = new File(fileUri.getPath).getName | ||
s"$downloadPath/$fileName" | ||
case _ => | ||
file | ||
} | ||
} | ||
|
||
private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { | ||
files.map { file => resolveFile(file, downloadPath) } | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,13 +32,15 @@ import org.apache.spark.util.Utils | |
*/ | ||
private[spark] trait DriverInitContainerComponentsProvider { | ||
|
||
def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver | ||
def provideContainerLocalizedFilesResolver( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove extra space after ) |
||
mainAppResource: String): ContainerLocalizedFilesResolver | ||
def provideInitContainerSubmittedDependencyUploader( | ||
driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] | ||
def provideSubmittedDependenciesSecretBuilder( | ||
maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) | ||
: Option[SubmittedDependencySecretBuilder] | ||
def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap | ||
def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter | ||
def provideInitContainerBundle(maybeSubmittedResourceIds: Option[SubmittedResourceIds], | ||
uris: Iterable[String]): Option[InitContainerBundle] | ||
} | ||
|
@@ -49,6 +51,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( | |
namespace: String, | ||
sparkJars: Seq[String], | ||
sparkFiles: Seq[String], | ||
pySparkFiles: Seq[String], | ||
resourceStagingServerExternalSslOptions: SSLOptions) | ||
extends DriverInitContainerComponentsProvider { | ||
|
||
|
@@ -104,6 +107,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( | |
private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) | ||
private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) | ||
private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) | ||
private val pySparkSubmitted = KubernetesFileUtils.getOnlySubmitterLocalFiles(pySparkFiles) | ||
|
||
private def provideInitContainerConfigMap( | ||
maybeSubmittedResourceIds: Option[SubmittedResourceIds]): ConfigMap = { | ||
|
@@ -130,17 +134,18 @@ private[spark] class DriverInitContainerComponentsProviderImpl( | |
} | ||
new SparkInitContainerConfigMapBuilderImpl( | ||
sparkJars, | ||
sparkFiles, | ||
sparkFiles ++ pySparkSubmitted, | ||
jarsDownloadPath, | ||
filesDownloadPath, | ||
configMapName, | ||
configMapKey, | ||
submittedDependencyConfigPlugin).build() | ||
} | ||
|
||
override def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver = { | ||
override def provideContainerLocalizedFilesResolver(mainAppResource: String) | ||
: ContainerLocalizedFilesResolver = { | ||
new ContainerLocalizedFilesResolverImpl( | ||
sparkJars, sparkFiles, jarsDownloadPath, filesDownloadPath) | ||
sparkJars, sparkFiles, pySparkFiles, mainAppResource, jarsDownloadPath, filesDownloadPath) | ||
} | ||
|
||
private def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { | ||
|
@@ -159,7 +164,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( | |
namespace, | ||
stagingServerUri, | ||
sparkJars, | ||
sparkFiles, | ||
sparkFiles ++ pySparkSubmitted, | ||
resourceStagingServerExternalSslOptions, | ||
RetrofitClientFactoryImpl) | ||
} | ||
|
@@ -201,13 +206,15 @@ private[spark] class DriverInitContainerComponentsProviderImpl( | |
configMapKey, | ||
resourceStagingServerSecretPlugin) | ||
} | ||
|
||
override def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter = { | ||
new DriverPodKubernetesFileMounterImpl() | ||
} | ||
override def provideInitContainerBundle( | ||
maybeSubmittedResourceIds: Option[SubmittedResourceIds], | ||
uris: Iterable[String]): Option[InitContainerBundle] = { | ||
val containerLocalizedFilesResolver = provideContainerLocalizedFilesResolver() | ||
// Bypass init-containers if `spark.jars` and `spark.files` is empty or only has `local://` URIs | ||
if (KubernetesFileUtils.getNonContainerLocalFiles(uris).nonEmpty) { | ||
// Bypass init-containers if `spark.jars` and `spark.files` and '--py-rilfes' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this is pre-existing but why? |
||
// is empty or only has `local://` URIs | ||
if ((KubernetesFileUtils.getNonContainerLocalFiles(uris) ++ pySparkSubmitted).nonEmpty) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So right now pySparkSubmitted is defined as |
||
Some(InitContainerBundle(provideInitContainerConfigMap(maybeSubmittedResourceIds), | ||
provideInitContainerBootstrap(), | ||
provideExecutorInitContainerConfiguration())) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know this is unrelated, but so happy to have local support :)