Skip to content

Commit

Permalink
[SPARK-11988][ML][MLLIB] Update JPMML to 1.2.7
Browse files Browse the repository at this point in the history
Update JPMML pmml-model to 1.2.7

Author: Sean Owen <sowen@cloudera.com>

Closes apache#9972 from srowen/SPARK-11988.
  • Loading branch information
srowen committed Dec 5, 2015
1 parent e9c9ae2 commit 7da6748
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 65 deletions.
3 changes: 1 addition & 2 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
Expand Down Expand Up @@ -237,7 +236,7 @@ The following components are provided under a BSD-style license. See project lin
The text of each license is also included at licenses/LICENSE-[project].txt.

(BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model)
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
(BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
(BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
Expand Down
2 changes: 1 addition & 1 deletion mllib/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
<dependency>
<groupId>org.jpmml</groupId>
<artifactId>pmml-model</artifactId>
<version>1.1.15</version>
<version>1.2.7</version>
<exclusions>
<exclusion>
<groupId>com.sun.xml.fastinfoset</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
val fields = new SArray[FieldName](model.weights.size)
val dataDictionary = new DataDictionary
val miningSchema = new MiningSchema
val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1")
val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
var interceptNO = threshold
if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
if (threshold <= 0) {
Expand All @@ -56,35 +56,35 @@ private[mllib] class BinaryClassificationPMMLModelExport(
interceptNO = -math.log(1 / threshold - 1)
}
}
val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0")
val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
val regressionModel = new RegressionModel()
.withFunctionName(MiningFunctionType.CLASSIFICATION)
.withMiningSchema(miningSchema)
.withModelName(description)
.withNormalizationMethod(normalizationMethod)
.withRegressionTables(regressionTableYES, regressionTableNO)
.setFunctionName(MiningFunctionType.CLASSIFICATION)
.setMiningSchema(miningSchema)
.setModelName(description)
.setNormalizationMethod(normalizationMethod)
.addRegressionTables(regressionTableYES, regressionTableNO)

for (i <- 0 until model.weights.size) {
fields(i) = FieldName.create("field_" + i)
dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
.withMiningFields(new MiningField(fields(i))
.withUsageType(FieldUsageType.ACTIVE))
regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
.addMiningFields(new MiningField(fields(i))
.setUsageType(FieldUsageType.ACTIVE))
regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
}

// add target field
val targetField = FieldName.create("target")
dataDictionary
.withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
.addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
miningSchema
.withMiningFields(new MiningField(targetField)
.withUsageType(FieldUsageType.TARGET))
.addMiningFields(new MiningField(targetField)
.setUsageType(FieldUsageType.TARGET))

dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

pmml.setDataDictionary(dataDictionary)
pmml.withModels(regressionModel)
pmml.addModels(regressionModel)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,31 +45,31 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
val miningSchema = new MiningSchema
val regressionTable = new RegressionTable(model.intercept)
val regressionModel = new RegressionModel()
.withFunctionName(MiningFunctionType.REGRESSION)
.withMiningSchema(miningSchema)
.withModelName(description)
.withRegressionTables(regressionTable)
.setFunctionName(MiningFunctionType.REGRESSION)
.setMiningSchema(miningSchema)
.setModelName(description)
.addRegressionTables(regressionTable)

for (i <- 0 until model.weights.size) {
fields(i) = FieldName.create("field_" + i)
dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
.withMiningFields(new MiningField(fields(i))
.withUsageType(FieldUsageType.ACTIVE))
regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
.addMiningFields(new MiningField(fields(i))
.setUsageType(FieldUsageType.ACTIVE))
regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
}

// for completeness add target field
val targetField = FieldName.create("target")
dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
.withMiningFields(new MiningField(targetField)
.withUsageType(FieldUsageType.TARGET))
.addMiningFields(new MiningField(targetField)
.setUsageType(FieldUsageType.TARGET))

dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

pmml.setDataDictionary(dataDictionary)
pmml.withModels(regressionModel)
pmml.addModels(regressionModel)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,42 +42,42 @@ private[mllib] class KMeansPMMLModelExport(model : KMeansModel) extends PMMLMode
val dataDictionary = new DataDictionary
val miningSchema = new MiningSchema
val comparisonMeasure = new ComparisonMeasure()
.withKind(ComparisonMeasure.Kind.DISTANCE)
.withMeasure(new SquaredEuclidean())
.setKind(ComparisonMeasure.Kind.DISTANCE)
.setMeasure(new SquaredEuclidean())
val clusteringModel = new ClusteringModel()
.withModelName("k-means")
.withMiningSchema(miningSchema)
.withComparisonMeasure(comparisonMeasure)
.withFunctionName(MiningFunctionType.CLUSTERING)
.withModelClass(ClusteringModel.ModelClass.CENTER_BASED)
.withNumberOfClusters(model.clusterCenters.length)
.setModelName("k-means")
.setMiningSchema(miningSchema)
.setComparisonMeasure(comparisonMeasure)
.setFunctionName(MiningFunctionType.CLUSTERING)
.setModelClass(ClusteringModel.ModelClass.CENTER_BASED)
.setNumberOfClusters(model.clusterCenters.length)

for (i <- 0 until clusterCenter.size) {
fields(i) = FieldName.create("field_" + i)
dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
.withMiningFields(new MiningField(fields(i))
.withUsageType(FieldUsageType.ACTIVE))
clusteringModel.withClusteringFields(
new ClusteringField(fields(i)).withCompareFunction(CompareFunctionType.ABS_DIFF))
.addMiningFields(new MiningField(fields(i))
.setUsageType(FieldUsageType.ACTIVE))
clusteringModel.addClusteringFields(
new ClusteringField(fields(i)).setCompareFunction(CompareFunctionType.ABS_DIFF))
}

dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

for (i <- 0 until model.clusterCenters.length) {
for (i <- model.clusterCenters.indices) {
val cluster = new Cluster()
.withName("cluster_" + i)
.withArray(new org.dmg.pmml.Array()
.withType(Array.Type.REAL)
.withN(clusterCenter.size)
.withValue(model.clusterCenters(i).toArray.mkString(" ")))
.setName("cluster_" + i)
.setArray(new org.dmg.pmml.Array()
.setType(Array.Type.REAL)
.setN(clusterCenter.size)
.setValue(model.clusterCenters(i).toArray.mkString(" ")))
// we don't have the size of the single cluster but only the centroids (withValue)
// .withSize(value)
clusteringModel.withClusters(cluster)
clusteringModel.addClusters(cluster)
}

pmml.setDataDictionary(dataDictionary)
pmml.withModels(clusteringModel)
pmml.addModels(clusteringModel)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,14 @@ private[mllib] trait PMMLModelExport {
* Holder of the exported model in PMML format
*/
@BeanProperty
val pmml: PMML = new PMML

pmml.setVersion("4.2")
setHeader(pmml)

private def setHeader(pmml: PMML): Unit = {
val pmml: PMML = {
val version = getClass.getPackage.getImplementationVersion
val app = new Application().withName("Apache Spark MLlib").withVersion(version)
val app = new Application("Apache Spark MLlib").setVersion(version)
val timestamp = new Timestamp()
.withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
.addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
val header = new Header()
.withApplication(app)
.withTimestamp(timestamp)
pmml.setHeader(header)
.setApplication(app)
.setTimestamp(timestamp)
new PMML("4.2", header, null)
}
}

0 comments on commit 7da6748

Please sign in to comment.