-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-20043][ML] DecisionTreeModel: ImpurityCalculator builder fails for uppercase impurity type Gini #17407
Changes from 2 commits
b554fce
80f3306
276cac6
55fe603
5e0fbe3
22ee03d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -385,6 +385,20 @@ class DecisionTreeClassifierSuite | |
testEstimatorAndModelReadWrite(dt, continuousData, allParamSettings ++ Map("maxDepth" -> 0), | ||
allParamSettings ++ Map("maxDepth" -> 0), checkModelData) | ||
} | ||
|
||
test("read/write: ImpurityCalculator builder did not recognize impurity type: Gini") { | ||
val rdd = TreeTests.getTreeReadWriteData(sc) | ||
|
||
val categoricalData: DataFrame = | ||
TreeTests.setMetadata(rdd, Map(0 -> 2, 1 -> 3), numClasses = 2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To simplify this, you can write There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
|
||
// BUG: see SPARK-20043 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd put the JIRA number in the test title. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed the comment. |
||
val dt = new DecisionTreeClassifier().setImpurity("Gini") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To make this faster, set maxDepth = 2 (something small) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set maxDepth = 2. |
||
|
||
val model = dt.fit(categoricalData) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The blank lines kinda stand out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. delete some blank lines to keep compact. |
||
testDefaultReadWrite(model, false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about setting testParams=true for this case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
} | ||
} | ||
|
||
private[ml] object DecisionTreeClassifierSuite extends SparkFunSuite { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -178,6 +178,20 @@ class DecisionTreeRegressorSuite | |
TreeTests.allParamSettings ++ Map("maxDepth" -> 0), | ||
TreeTests.allParamSettings ++ Map("maxDepth" -> 0), checkModelData) | ||
} | ||
|
||
test("read/write: ImpurityCalculator builder did not recognize impurity type: Variance") { | ||
val rdd = TreeTests.getTreeReadWriteData(sc) | ||
|
||
val continuousData: DataFrame = | ||
TreeTests.setMetadata(rdd, Map.empty[Int, Int], numClasses = 0) | ||
|
||
// BUG: see SPARK-20043 | ||
val dt = new DecisionTreeRegressor().setImpurity("Variance") | ||
|
||
val model = dt.fit(continuousData) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the second unit test seems redundant for this case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed. |
||
testDefaultReadWrite(model, false) | ||
} | ||
} | ||
|
||
private[ml] object DecisionTreeRegressorSuite extends SparkFunSuite { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be more specific, how about:
"SAPRK-20043: ImpurityCalculator builder fails for uppercase impurity type Gini in model read/write"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.