From 032cd62cee6b2bd134f6b9017a7e68ef333990a5 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Thu, 18 Sep 2014 15:01:13 -0700 Subject: [PATCH] add more type check and conversion for user_product --- python/pyspark/mllib/recommendation.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index f61b9cf9ec872..59c1c5ff0ced0 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -66,6 +66,16 @@ def predict(self, user, product): def predictAll(self, user_product): assert isinstance(user_product, RDD), "user_product should be RDD of (user, product)" + first = user_product.first() + if isinstance(first, list): + user_product = user_product.map(tuple) + first = tuple(first) + assert type(first) is tuple and len(first) == 2, \ + "user_product should be RDD of (user, product)" + if any(isinstance(x, str) for x in first): + user_product = user_product.map(lambda (u, p): (int(x), int(p))) + first = tuple(map(int, first)) + assert all(type(x) is int for x in first), "user and product in user_product shoul be int" sc = self._context tuplerdd = sc._jvm.SerDe.asTupleRDD(user_product._to_java_object_rdd().rdd()) jresult = self._java_model.predict(tuplerdd).toJavaRDD()