-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-45506][CONNECT] Add ivy URI support to SparkConnect addArtifact #43354
Changes from 4 commits
291dc8c
ac5e48a
d39199e
4cc803a
18fa50c
587ae1f
e6bc941
17ed7a1
85837d4
4bcd755
845738f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,20 @@ | |
<groupId>org.apache.commons</groupId> | ||
<artifactId>commons-text</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>commons-io</groupId> | ||
<artifactId>commons-io</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.ivy</groupId> | ||
<artifactId>ivy</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>oro</groupId> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW oro is pretty much dead. Why does ivy need it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. glob matchers ( Also if I remove this dep then tests start failing:
|
||
<!-- oro is needed by ivy, but only listed as an optional dependency, so we include it. --> | ||
<artifactId>oro</artifactId> | ||
<version>${oro.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-api</artifactId> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,30 +15,26 @@ | |
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.deploy | ||
package org.apache.spark.util | ||
|
||
import java.io.{File, FileInputStream, FileOutputStream} | ||
import java.util.jar.{JarEntry, JarOutputStream} | ||
import java.util.jar.{JarEntry, JarOutputStream, Manifest} | ||
import java.util.jar.Attributes.Name | ||
import java.util.jar.Manifest | ||
|
||
import scala.collection.mutable.ArrayBuffer | ||
|
||
import com.google.common.io.{ByteStreams, Files} | ||
import org.apache.commons.io.FileUtils | ||
import org.apache.ivy.core.settings.IvySettings | ||
|
||
import org.apache.spark.TestUtils.{createCompiledClass, JavaSourceFromString} | ||
import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate | ||
import org.apache.spark.util.Utils | ||
import org.apache.spark.util.MavenUtils.MavenCoordinate | ||
|
||
private[deploy] object IvyTestUtils { | ||
private[spark] object IvyTestUtils { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this file should be in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
|
||
/** | ||
* Create the path for the jar and pom from the maven coordinate. Extension should be `jar` | ||
* or `pom`. | ||
* Create the path for the jar and pom from the maven coordinate. Extension should be `jar` or | ||
* `pom`. | ||
*/ | ||
private[deploy] def pathFromCoordinate( | ||
private[spark] def pathFromCoordinate( | ||
artifact: MavenCoordinate, | ||
prefix: File, | ||
ext: String, | ||
|
@@ -55,7 +51,7 @@ private[deploy] object IvyTestUtils { | |
} | ||
|
||
/** Returns the artifact naming based on standard ivy or maven format. */ | ||
private[deploy] def artifactName( | ||
private[spark] def artifactName( | ||
artifact: MavenCoordinate, | ||
useIvyLayout: Boolean, | ||
ext: String = ".jar"): String = { | ||
|
@@ -76,7 +72,7 @@ private[deploy] object IvyTestUtils { | |
} | ||
|
||
/** Write the contents to a file to the supplied directory. */ | ||
private[deploy] def writeFile(dir: File, fileName: String, contents: String): File = { | ||
private[spark] def writeFile(dir: File, fileName: String, contents: String): File = { | ||
val outputFile = new File(dir, fileName) | ||
val outputStream = new FileOutputStream(outputFile) | ||
outputStream.write(contents.toCharArray.map(_.toByte)) | ||
|
@@ -99,7 +95,7 @@ private[deploy] object IvyTestUtils { | |
className: String, | ||
packageName: String): Seq[(String, File)] = { | ||
val rFilesDir = new File(dir, "R" + File.separator + "pkg") | ||
Files.createParentDirs(new File(rFilesDir, "R" + File.separator + "mylib.R")) | ||
new File(rFilesDir, "R").mkdirs() | ||
val contents = | ||
s"""myfunc <- function(x) { | ||
| SparkR:::callJStatic("$packageName.$className", "myFunc", x) | ||
|
@@ -126,7 +122,10 @@ private[deploy] object IvyTestUtils { | |
|export("myfunc") | ||
""".stripMargin | ||
val nameFile = writeFile(rFilesDir, "NAMESPACE", namespace) | ||
Seq(("R/pkg/R/mylib.R", source), ("R/pkg/DESCRIPTION", descFile), ("R/pkg/NAMESPACE", nameFile)) | ||
Seq( | ||
("R/pkg/R/mylib.R", source), | ||
("R/pkg/DESCRIPTION", descFile), | ||
("R/pkg/NAMESPACE", nameFile)) | ||
} | ||
|
||
/** Create a simple testable Class. */ | ||
|
@@ -143,8 +142,8 @@ private[deploy] object IvyTestUtils { | |
|} | ||
""".stripMargin | ||
val sourceFile = | ||
new JavaSourceFromString(new File(dir, className).toURI.getPath, contents) | ||
createCompiledClass(className, dir, sourceFile, Seq.empty) | ||
new SparkTestUtils.JavaSourceFromString(new File(dir, className).toURI.getPath, contents) | ||
SparkTestUtils.createCompiledClass(className, dir, sourceFile, Seq.empty) | ||
} | ||
|
||
private def createDescriptor( | ||
|
@@ -154,11 +153,11 @@ private[deploy] object IvyTestUtils { | |
useIvyLayout: Boolean): File = { | ||
if (useIvyLayout) { | ||
val ivyXmlPath = pathFromCoordinate(artifact, tempPath, "ivy", true) | ||
Files.createParentDirs(new File(ivyXmlPath, "dummy")) | ||
ivyXmlPath.mkdirs() | ||
createIvyDescriptor(ivyXmlPath, artifact, dependencies) | ||
} else { | ||
val pomPath = pathFromCoordinate(artifact, tempPath, "pom", useIvyLayout) | ||
Files.createParentDirs(new File(pomPath, "dummy")) | ||
pomPath.mkdirs() | ||
createPom(pomPath, artifact, dependencies) | ||
} | ||
} | ||
|
@@ -185,12 +184,16 @@ private[deploy] object IvyTestUtils { | |
| <modelVersion>4.0.0</modelVersion> | ||
""".stripMargin.trim | ||
content += pomArtifactWriter(artifact) | ||
content += dependencies.map { deps => | ||
val inside = deps.map { dep => | ||
"\t<dependency>" + pomArtifactWriter(dep, 3) + "\n\t</dependency>" | ||
}.mkString("\n") | ||
"\n <dependencies>\n" + inside + "\n </dependencies>" | ||
}.getOrElse("") | ||
content += dependencies | ||
.map { deps => | ||
val inside = deps | ||
.map { dep => | ||
"\t<dependency>" + pomArtifactWriter(dep, 3) + "\n\t</dependency>" | ||
} | ||
.mkString("\n") | ||
"\n <dependencies>\n" + inside + "\n </dependencies>" | ||
} | ||
.getOrElse("") | ||
content += "\n</project>" | ||
writeFile(dir, artifactName(artifact, false, ".pom"), content.trim) | ||
} | ||
|
@@ -226,16 +229,18 @@ private[deploy] object IvyTestUtils { | |
| conf="master"/> | ||
| </publications> | ||
""".stripMargin.trim | ||
content += dependencies.map { deps => | ||
val inside = deps.map(ivyArtifactWriter).mkString("\n") | ||
"\n <dependencies>\n" + inside + "\n </dependencies>" | ||
}.getOrElse("") | ||
content += dependencies | ||
.map { deps => | ||
val inside = deps.map(ivyArtifactWriter).mkString("\n") | ||
"\n <dependencies>\n" + inside + "\n </dependencies>" | ||
} | ||
.getOrElse("") | ||
content += "\n</ivy-module>" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to delete this line? It will make the content of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Give a fix: #43834 |
||
writeFile(dir, "ivy.xml", content.trim) | ||
} | ||
|
||
/** Create the jar for the given maven coordinate, using the supplied files. */ | ||
private[deploy] def packJar( | ||
private[spark] def packJar( | ||
dir: File, | ||
artifact: MavenCoordinate, | ||
files: Seq[(String, File)], | ||
|
@@ -266,7 +271,7 @@ private[deploy] object IvyTestUtils { | |
jarStream.putNextEntry(jarEntry) | ||
|
||
val in = new FileInputStream(file._2) | ||
ByteStreams.copy(in, jarStream) | ||
SparkStreamUtils.copyStream(in, jarStream) | ||
in.close() | ||
} | ||
jarStream.close() | ||
|
@@ -277,15 +282,21 @@ private[deploy] object IvyTestUtils { | |
|
||
/** | ||
* Creates a jar and pom file, mocking a Maven repository. The root path can be supplied with | ||
* `tempDir`, dependencies can be created into the same repo, and python files can also be packed | ||
* inside the jar. | ||
* `tempDir`, dependencies can be created into the same repo, and python files can also be | ||
* packed inside the jar. | ||
* | ||
* @param artifact The maven coordinate to generate the jar and pom for. | ||
* @param dependencies List of dependencies this artifact might have to also create jars and poms. | ||
* @param tempDir The root folder of the repository | ||
* @param useIvyLayout whether to mock the Ivy layout for local repository testing | ||
* @param withPython Whether to pack python files inside the jar for extensive testing. | ||
* @return Root path of the repository | ||
* @param artifact | ||
* The maven coordinate to generate the jar and pom for. | ||
* @param dependencies | ||
* List of dependencies this artifact might have to also create jars and poms. | ||
* @param tempDir | ||
* The root folder of the repository | ||
* @param useIvyLayout | ||
* whether to mock the Ivy layout for local repository testing | ||
* @param withPython | ||
* Whether to pack python files inside the jar for extensive testing. | ||
* @return | ||
* Root path of the repository | ||
*/ | ||
private def createLocalRepository( | ||
artifact: MavenCoordinate, | ||
|
@@ -295,15 +306,15 @@ private[deploy] object IvyTestUtils { | |
withPython: Boolean = false, | ||
withR: Boolean = false): File = { | ||
// Where the root of the repository exists, and what Ivy will search in | ||
val tempPath = tempDir.getOrElse(Utils.createTempDir()) | ||
val tempPath = tempDir.getOrElse(SparkFileUtils.createTempDir()) | ||
// Create directory if it doesn't exist | ||
Files.createParentDirs(tempPath) | ||
tempPath.mkdirs() | ||
// Where to create temporary class files and such | ||
val root = new File(tempPath, tempPath.hashCode().toString) | ||
Files.createParentDirs(new File(root, "dummy")) | ||
root.mkdirs() | ||
try { | ||
val jarPath = pathFromCoordinate(artifact, tempPath, "jar", useIvyLayout) | ||
Files.createParentDirs(new File(jarPath, "dummy")) | ||
jarPath.mkdirs() | ||
val className = "MyLib" | ||
|
||
val javaClass = createJavaClass(root, className, artifact.groupId) | ||
|
@@ -330,58 +341,78 @@ private[deploy] object IvyTestUtils { | |
|
||
/** | ||
* Creates a suite of jars and poms, with or without dependencies, mocking a maven repository. | ||
* @param artifact The main maven coordinate to generate the jar and pom for. | ||
* @param dependencies List of dependencies this artifact might have to also create jars and poms. | ||
* @param rootDir The root folder of the repository (like `~/.m2/repositories`) | ||
* @param useIvyLayout whether to mock the Ivy layout for local repository testing | ||
* @param withPython Whether to pack python files inside the jar for extensive testing. | ||
* @return Root path of the repository. Will be `rootDir` if supplied. | ||
* @param artifact | ||
* The main maven coordinate to generate the jar and pom for. | ||
* @param dependencies | ||
* List of dependencies this artifact might have to also create jars and poms. | ||
* @param rootDir | ||
* The root folder of the repository (like `~/.m2/repositories`) | ||
* @param useIvyLayout | ||
* whether to mock the Ivy layout for local repository testing | ||
* @param withPython | ||
* Whether to pack python files inside the jar for extensive testing. | ||
* @return | ||
* Root path of the repository. Will be `rootDir` if supplied. | ||
*/ | ||
private[deploy] def createLocalRepositoryForTests( | ||
private[spark] def createLocalRepositoryForTests( | ||
artifact: MavenCoordinate, | ||
dependencies: Option[String], | ||
rootDir: Option[File], | ||
useIvyLayout: Boolean = false, | ||
withPython: Boolean = false, | ||
withR: Boolean = false): File = { | ||
val deps = dependencies.map(SparkSubmitUtils.extractMavenCoordinates) | ||
val deps = dependencies.map(MavenUtils.extractMavenCoordinates) | ||
val mainRepo = createLocalRepository(artifact, deps, rootDir, useIvyLayout, withPython, withR) | ||
deps.foreach { seq => seq.foreach { dep => | ||
createLocalRepository(dep, None, Some(mainRepo), useIvyLayout, withPython = false) | ||
}} | ||
deps.foreach { seq => | ||
seq.foreach { dep => | ||
createLocalRepository(dep, None, Some(mainRepo), useIvyLayout, withPython = false) | ||
} | ||
} | ||
mainRepo | ||
} | ||
|
||
/** | ||
* Creates a repository for a test, and cleans it up afterwards. | ||
* | ||
* @param artifact The main maven coordinate to generate the jar and pom for. | ||
* @param dependencies List of dependencies this artifact might have to also create jars and poms. | ||
* @param rootDir The root folder of the repository (like `~/.m2/repositories`) | ||
* @param useIvyLayout whether to mock the Ivy layout for local repository testing | ||
* @param withPython Whether to pack python files inside the jar for extensive testing. | ||
* @return Root path of the repository. Will be `rootDir` if supplied. | ||
* @param artifact | ||
* The main maven coordinate to generate the jar and pom for. | ||
* @param dependencies | ||
* List of dependencies this artifact might have to also create jars and poms. | ||
* @param rootDir | ||
* The root folder of the repository (like `~/.m2/repositories`) | ||
* @param useIvyLayout | ||
* whether to mock the Ivy layout for local repository testing | ||
* @param withPython | ||
* Whether to pack python files inside the jar for extensive testing. | ||
* @return | ||
* Root path of the repository. Will be `rootDir` if supplied. | ||
*/ | ||
private[deploy] def withRepository( | ||
private[spark] def withRepository( | ||
artifact: MavenCoordinate, | ||
dependencies: Option[String], | ||
rootDir: Option[File], | ||
useIvyLayout: Boolean = false, | ||
withPython: Boolean = false, | ||
withR: Boolean = false, | ||
ivySettings: IvySettings = new IvySettings)(f: String => Unit): Unit = { | ||
val deps = dependencies.map(SparkSubmitUtils.extractMavenCoordinates) | ||
val deps = dependencies.map(MavenUtils.extractMavenCoordinates) | ||
purgeLocalIvyCache(artifact, deps, ivySettings) | ||
val repo = createLocalRepositoryForTests(artifact, dependencies, rootDir, useIvyLayout, | ||
vsevolodstep-db marked this conversation as resolved.
Show resolved
Hide resolved
|
||
withPython, withR) | ||
val repo = createLocalRepositoryForTests( | ||
artifact, | ||
dependencies, | ||
rootDir, | ||
useIvyLayout, | ||
withPython, | ||
withR) | ||
try { | ||
f(repo.toURI.toString) | ||
} finally { | ||
// Clean up | ||
if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2")) { | ||
val groupDir = getBaseGroupDirectory(artifact, useIvyLayout) | ||
FileUtils.deleteDirectory(new File(repo, groupDir + File.separator + artifact.artifactId)) | ||
deps.foreach { _.foreach { dep => | ||
deps.foreach { | ||
_.foreach { dep => | ||
FileUtils.deleteDirectory(new File(repo, getBaseGroupDirectory(dep, useIvyLayout))) | ||
} | ||
} | ||
|
@@ -399,7 +430,8 @@ private[deploy] object IvyTestUtils { | |
ivySettings: IvySettings): Unit = { | ||
// delete the artifact from the cache as well if it already exists | ||
FileUtils.deleteDirectory(new File(ivySettings.getDefaultCache, artifact.groupId)) | ||
dependencies.foreach { _.foreach { dep => | ||
dependencies.foreach { | ||
_.foreach { dep => | ||
FileUtils.deleteDirectory(new File(ivySettings.getDefaultCache, dep.groupId)) | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How much deps do we add to the client by doing this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
~400 Kb from commons-io and ~1Mb from Ivy