Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Qualification Tool add test for SQL Description escaping commas for csv #6120

Merged
merged 1 commit into from
Jul 27, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,19 @@ class QualificationSuite extends FunSuite with BeforeAndAfterEach with Logging {

def csvDetailedHeader(ind: Int) = csvDetailedFields(ind)._1

override protected def beforeEach(): Unit = {
TrampolineUtil.cleanupAnyExistingSession()
private def createSparkSession(): Unit = {
sparkSession = SparkSession
.builder()
.master("local[*]")
.appName("Rapids Spark Profiling Tool Unit Tests")
.getOrCreate()
}

override protected def beforeEach(): Unit = {
TrampolineUtil.cleanupAnyExistingSession()
createSparkSession()
}

def readExpectedFile(expected: File): DataFrame = {
ToolTestUtils.readExpectationCSV(sparkSession, expected.getPath(),
Some(schema))
Expand Down Expand Up @@ -656,6 +660,7 @@ class QualificationSuite extends FunSuite with BeforeAndAfterEach with Logging {
spark.sparkContext.addSparkListener(listener)
import spark.implicits._
val testData = Seq((1, 2), (3, 4)).toDF("a", "b")
spark.sparkContext.setJobDescription("testing, csv delimiter; replacement")
testData.createOrReplaceTempView("t1")
testData.createOrReplaceTempView("t2")
spark.sql("SELECT a, MAX(b) FROM (SELECT t1.a, t2.b " +
Expand All @@ -667,13 +672,30 @@ class QualificationSuite extends FunSuite with BeforeAndAfterEach with Logging {
// run the qualification tool
TrampolineUtil.withTempDir { outpath =>
val appArgs = new QualificationArgs(Array(
"--per-sql",
"--output-directory",
outpath.getAbsolutePath,
eventLog))

val (exit, sumInfo) =
QualificationMain.mainInternal(appArgs)
assert(exit == 0)
// the code above that runs the Spark query stops the Sparksession
// so create a new one to read in the csv file
createSparkSession()

// validate that the SQL description in the csv file escapes commas properly
val persqlResults = s"$outpath/rapids_4_spark_qualification_output/" +
s"rapids_4_spark_qualification_output_persql.csv"
val dfPerSqlActual = readPerSqlFile(new File(persqlResults))
// the number of columns actually won't be wrong if sql description is malformatted
// because spark seems to drop extra column so need more checking
assert(dfPerSqlActual.columns.size == 10)
val rows = dfPerSqlActual.collect()
assert(rows.size == 3)
val firstRow = rows(1)
// , should be replaced with ;
assert(firstRow(3) == "testing; csv delimiter; replacement")

// parse results from listener
val executorCpuTime = listener.executorCpuTime
Expand Down