Skip to content

Commit

Permalink
[SPARK-1455] [SPARK-3534] [Build] When possible, run SQL tests only.
Browse files Browse the repository at this point in the history
If the only files changed are related to SQL, then only run the SQL tests.

This patch includes some cosmetic/maintainability refactoring. I would be more than happy to undo some of these changes if they are inappropriate.

We can accept this patch mostly as-is and address the immediate need documented in [SPARK-3534](https://issues.apache.org/jira/browse/SPARK-3534), or we can keep it open until a satisfactory solution along the lines [discussed here](https://issues.apache.org/jira/browse/SPARK-1455?focusedCommentId=14136424&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14136424) is reached.

Note: I had to hack this patch up to test it locally, so what I'm submitting here and what I tested are technically different.

Author: Nicholas Chammas <nicholas.chammas@gmail.com>

Closes apache#2420 from nchammas/selective-testing and squashes the following commits:

db3fa2d [Nicholas Chammas] diff against master!
f9e23f6 [Nicholas Chammas] when possible, run SQL tests only
  • Loading branch information
nchammas authored and marmbrus committed Sep 17, 2014
1 parent cbf983b commit 5044e49
Showing 1 changed file with 106 additions and 50 deletions.
156 changes: 106 additions & 50 deletions dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -21,44 +21,73 @@
FWDIR="$(cd "`dirname $0`"/..; pwd)"
cd "$FWDIR"

if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Dhadoop.version=2.2.0"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
# Remove work directory
rm -rf ./work

# Build against the right verison of Hadoop.
{
if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Dhadoop.version=2.2.0"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
fi
fi
fi

if [ -z "$SBT_MAVEN_PROFILES_ARGS" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
fi
if [ -z "$SBT_MAVEN_PROFILES_ARGS" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
fi
}

export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"

echo "SBT_MAVEN_PROFILES_ARGS=\"$SBT_MAVEN_PROFILES_ARGS\""

# Remove work directory
rm -rf ./work

if test -x "$JAVA_HOME/bin/java"; then
declare java_cmd="$JAVA_HOME/bin/java"
else
declare java_cmd=java
fi
JAVA_VERSION=$($java_cmd -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
[ "$JAVA_VERSION" -ge 18 ] && echo "" || echo "[Warn] Java 8 tests will not run because JDK version is < 1.8."
# Determine Java path and version.
{
if test -x "$JAVA_HOME/bin/java"; then
declare java_cmd="$JAVA_HOME/bin/java"
else
declare java_cmd=java
fi

# We can't use sed -r -e due to OS X / BSD compatibility; hence, all the parentheses.
JAVA_VERSION=$(
$java_cmd -version 2>&1 \
| grep -e "^java version" --max-count=1 \
| sed "s/java version \"\(.*\)\.\(.*\)\.\(.*\)\"/\1\2/"
)

if [ "$JAVA_VERSION" -lt 18 ]; then
echo "[warn] Java 8 tests will not run because JDK version is < 1.8."
fi
}

# Partial solution for SPARK-1455. Only run Hive tests if there are sql changes.
# Only run Hive tests if there are sql changes.
# Partial solution for SPARK-1455.
if [ -n "$AMPLAB_JENKINS" ]; then
git fetch origin master:master
diffs=`git diff --name-only master | grep "^\(sql/\)\|\(bin/spark-sql\)\|\(sbin/start-thriftserver.sh\)"`
if [ -n "$diffs" ]; then
echo "Detected changes in SQL. Will run Hive test suite."

sql_diffs=$(
git diff --name-only master \
| grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
)

non_sql_diffs=$(
git diff --name-only master \
| grep -v -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
)

if [ -n "$sql_diffs" ]; then
echo "[info] Detected changes in SQL. Will run Hive test suite."
_RUN_SQL_TESTS=true

if [ -z "$non_sql_diffs" ]; then
echo "[info] Detected no changes except in SQL. Will only run SQL tests."
_SQL_TESTS_ONLY=true
fi
fi
fi

Expand All @@ -70,42 +99,69 @@ echo ""
echo "========================================================================="
echo "Running Apache RAT checks"
echo "========================================================================="
dev/check-license
./dev/check-license

echo ""
echo "========================================================================="
echo "Running Scala style checks"
echo "========================================================================="
dev/lint-scala
./dev/lint-scala

echo ""
echo "========================================================================="
echo "Running Python style checks"
echo "========================================================================="
dev/lint-python
./dev/lint-python

echo ""
echo "========================================================================="
echo "Building Spark"
echo "========================================================================="

{
# We always build with Hive because the PySpark Spark SQL tests need it.
BUILD_MVN_PROFILE_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"

echo "[info] Building Spark with these arguments: $BUILD_MVN_PROFILE_ARGS"

# NOTE: echo "q" is needed because sbt on encountering a build file with failure
#+ (either resolution or compilation) prompts the user for input either q, r, etc
#+ to quit or retry. This echo is there to make it not block.
# QUESTION: Why doesn't 'yes "q"' work?
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
echo -e "q\n" \
| sbt/sbt $BUILD_MVN_PROFILE_ARGS clean package assembly/assembly \
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
}

echo ""
echo "========================================================================="
echo "Running Spark unit tests"
echo "========================================================================="

# Build Spark; we always build with Hive because the PySpark Spark SQL tests need it.
# echo "q" is needed because sbt on encountering a build file with failure
# (either resolution or compilation) prompts the user for input either q, r,
# etc to quit or retry. This echo is there to make it not block.
BUILD_MVN_PROFILE_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive "
echo -e "q\n" | sbt/sbt $BUILD_MVN_PROFILE_ARGS clean package assembly/assembly | \
grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"

# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled:
if [ -n "$_RUN_SQL_TESTS" ]; then
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
fi
# echo "q" is needed because sbt on encountering a build file with failure
# (either resolution or compilation) prompts the user for input either q, r,
# etc to quit or retry. This echo is there to make it not block.
echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS test | \
grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
{
# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
if [ -n "$_RUN_SQL_TESTS" ]; then
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
fi

if [ -n "$_SQL_TESTS_ONLY" ]; then
SBT_MAVEN_TEST_ARGS="catalyst/test sql/test hive/test"
else
SBT_MAVEN_TEST_ARGS="test"
fi

echo "[info] Running Spark tests with these arguments: $SBT_MAVEN_PROFILES_ARGS $SBT_MAVEN_TEST_ARGS"

# NOTE: echo "q" is needed because sbt on encountering a build file with failure
#+ (either resolution or compilation) prompts the user for input either q, r, etc
#+ to quit or retry. This echo is there to make it not block.
# QUESTION: Why doesn't 'yes "q"' work?
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
echo -e "q\n" \
| sbt/sbt "$SBT_MAVEN_PROFILES_ARGS" "$SBT_MAVEN_TEST_ARGS" \
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
}

echo ""
echo "========================================================================="
Expand All @@ -117,4 +173,4 @@ echo ""
echo "========================================================================="
echo "Detecting binary incompatibilites with MiMa"
echo "========================================================================="
dev/mima
./dev/mima

0 comments on commit 5044e49

Please sign in to comment.