Skip to content

Commit

Permalink
temp
Browse files Browse the repository at this point in the history
  • Loading branch information
vkorukanti committed Sep 5, 2024
1 parent 07714b4 commit 97ea76b
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 1 deletion.
4 changes: 4 additions & 0 deletions .github/workflows/spark_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ jobs:
pipenv run pip install pyarrow==8.0.0
pipenv run pip install numpy==1.20.3
if: steps.git-diff.outputs.diff
- name: Build Spark 3.5.4-SNAPSHOT locally
- name: Build Spark 3.5.4-SNAPSHOT locally
run: python3 build/generate_spark_jars.py

- name: Run Scala/Java and Python tests
# when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_master_test.yaml
run: |
Expand Down
3 changes: 2 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ val all_scala_versions = Seq(scala212, scala213)
val default_scala_version = settingKey[String]("Default Scala version")
Global / default_scala_version := scala212

val LATEST_RELEASED_SPARK_VERSION = "3.5.3"
val LATEST_RELEASED_SPARK_VERSION = "3.5.4-SNAPSHOT"
val SPARK_MASTER_VERSION = "4.0.0-SNAPSHOT"
val sparkVersion = settingKey[String]("Spark version")
spark / sparkVersion := getSparkVersion()
Expand Down Expand Up @@ -174,6 +174,7 @@ def crossSparkSettings(): Seq[Setting[_]] = getSparkVersion() match {
// For adding staged Spark RC versions, e.g.:
// resolvers += "Apache Spark 3.5.0 (RC1) Staging" at "https://repository.apache.org/content/repositories/orgapachespark-1444/",
resolvers += "Apache Spark 3.5.3 (RC1) Staging" at "https://repository.apache.org/content/repositories/orgapachespark-1464/",
// resolvers += "Spark 3.5.4 staging" at "https://repository.apache.org/content/groups/snapshots/",
Compile / unmanagedSourceDirectories += (Compile / baseDirectory).value / "src" / "main" / "scala-spark-3.5",
Test / unmanagedSourceDirectories += (Test / baseDirectory).value / "src" / "test" / "scala-spark-3.5",
Antlr4 / antlr4Version := "4.9.3",
Expand Down
79 changes: 79 additions & 0 deletions build/generate_spark_jars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env python3

#
# Copyright (2021) The Delta Lake Project Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import os
import glob
import subprocess
import shlex
import shutil
from os import path

def clone_and_build_spark():
branch = "branch-3.5"

print(f"Cloning Apache Spark repository ({branch})...")
run_cmd("git clone --depth 1 --branch %s https://github.com/apache/spark.git .temp_spark" %
(branch))

# Change to the cloned directory
os.chdir(".temp_spark")

# Set MAVEN_OPTS environment variable
maven_opts = "-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
os.environ["MAVEN_OPTS"] = maven_opts
print(f"Set MAVEN_OPTS to: {maven_opts}")

# Build the JAR files
print("Building Spark JAR files...")
build_command = "./build/mvn -DskipTests clean package install"
run_cmd(build_command)

print("Build completed successfully!")

def run_cmd(cmd, throw_on_error=True, env=None, stream_output=False, **kwargs):
if isinstance(cmd, str):
cmd = shlex.split(cmd)
cmd_env = os.environ.copy()
if env:
cmd_env.update(env)

if stream_output:
child = subprocess.Popen(cmd, env=cmd_env, **kwargs)
exit_code = child.wait()
if throw_on_error and exit_code != 0:
raise Exception("Non-zero exitcode: %s" % (exit_code))
print("----\n")
return exit_code
else:
child = subprocess.Popen(
cmd,
env=cmd_env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
**kwargs)
(stdout, stderr) = child.communicate()
exit_code = child.wait()
if throw_on_error and exit_code != 0:
raise Exception(
"Non-zero exitcode: %s\n\nSTDOUT:\n%s\n\nSTDERR:%s" %
(exit_code, stdout, stderr))
return (exit_code, stdout, stderr)

if __name__ == "__main__":
clone_and_build_spark()

0 comments on commit 97ea76b

Please sign in to comment.