Skip to content

Commit

Permalink
Implemented a function called fromTuple2RDD in PythonMLLibAPI and use…
Browse files Browse the repository at this point in the history
…d it to expose the MF userFeatures and productFeatures in python.
  • Loading branch information
Michelangelo D'Agostino committed Oct 20, 2014
1 parent 34cb2a2 commit 2aa1bf8
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -604,4 +604,10 @@ private[spark] object SerDe extends Serializable {
def asTupleRDD(rdd: RDD[Array[Any]]): RDD[(Int, Int)] = {
rdd.map(x => (x(0).asInstanceOf[Int], x(1).asInstanceOf[Int]))
}

/* convert RDD[Tuple2[,]] to RDD[Array[Any]] */
def fromTuple2RDD(rdd: RDD[Tuple2[Any, Any]]): RDD[Array[Any]] = {
rdd.map(x => Array(x._1, x._2))
}

}
10 changes: 9 additions & 1 deletion python/pyspark/mllib/recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,18 @@ def predictAll(self, user_product):

def userFeatures(self):
sc = self._context
juf = self._java_model.userFeaturesString().toJavaRDD()
juf = self._java_model.userFeatures()
juf = sc._jvm.SerDe.fromTuple2RDD(juf).toJavaRDD()
return RDD(sc._jvm.PythonRDD.javaToPython(juf), sc,
AutoBatchedSerializer(PickleSerializer()))

def productFeatures(self):
sc = self._context
jpf = self._java_model.productFeatures()
jpf = sc._jvm.SerDe.fromTuple2RDD(jpf).toJavaRDD()
return RDD(sc._jvm.PythonRDD.javaToPython(jpf), sc,
AutoBatchedSerializer(PickleSerializer()))


class ALS(object):

Expand Down

0 comments on commit 2aa1bf8

Please sign in to comment.