diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala index 8f3e6bd21b752..1787fe929865d 100644 --- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala @@ -157,6 +157,18 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext { )) } + test("join repro") { + val rdd1 = sc.parallelize(Array((0,(0,2.0)), (1,(0,2.0)), (2,(0,2.0)), (0,(1,1.5)), (1,(1,2.0)), (2,(1,2.25)), (0,(2,1.3333333333333333)), (1,(2,2.0)), + (2,(2,2.4)), (0,(3,1.25)), (1,(3,2.0)), (2,(3,2.5)), (0,(4,1.2)), (1,(4,2.0)), (2,(4,2.5714285714285716)), + (0,(5,1.1666666666666667)), (1,(5,2.0)), (2,(5,2.625)), (0,(6,1.1428571428571428)), (1,(6,2.0)), (2,(6,2.6666666666666665)), (0,(7,1.125)), + (1,(7,2.0)), (2,(7,2.7)), (0,(8,1.1111111111111112)), (1,(8,2.0)), (2,(8,2.727272727272727)), (0,(9,1.1)), (1,(9,2.0)), (2,(9,2.75)) + )) + val rdd2 = sc.parallelize(Array((0,(0,-0.03387252842009158)), (0,(1,0.789223562443648)), (0,(2,4.541482001513894)), (1,(0,-0.05259151798283574)), (1,(1,0.10552081450086591)), (1,(2,-9.596168335700813)), (2,(0,-0.06636448259750038)), (2,(1,-0.48644238753215385)), (2,(2,5.2866468281774734)) + )) + val joined = rdd1.join(rdd2).collect() + assert(joined.toSet === Set((1,((6,2.0),(0,-0.05259151798283574))), (2,((6,2.6666666666666665),(0,-0.06636448259750038))), (1,((2,2.0),(2,-9.596168335700813))), (1,((1,2.0),(2,-9.596168335700813))), (1,((3,2.0),(2,-9.596168335700813))), (0,((3,1.25),(2,4.541482001513894))), (1,((6,2.0),(2,-9.596168335700813))), (2,((4,2.5714285714285716),(2,5.2866468281774734))), (0,((2,1.3333333333333333),(2,4.541482001513894))), (0,((4,1.2),(0,-0.03387252842009158))), (1,((8,2.0),(0,-0.05259151798283574))), (1,((4,2.0),(1,0.10552081450086591))), (0,((9,1.1),(0,-0.03387252842009158))), (0,((7,1.125),(1,0.789223562443648))), (0,((2,1.3333333333333333),(0,-0.03387252842009158))), (1,((9,2.0),(1,0.10552081450086591))), (2,((8,2.727272727272727),(0,-0.06636448259750038))), (2,((6,2.6666666666666665),(2,5.2866468281774734))), (2,((0,2.0),(0,-0.06636448259750038))), (1,((3,2.0),(0,-0.05259151798283574))), (2,((4,2.5714285714285716),(0,-0.06636448259750038))), (1,((4,2.0),(0,-0.05259151798283574))), (0,((6,1.1428571428571428),(2,4.541482001513894))), (0,((7,1.125),(2,4.541482001513894))), (2,((0,2.0),(2,5.2866468281774734))), (1,((0,2.0),(0,-0.05259151798283574))), (2,((3,2.5),(2,5.2866468281774734))), (1,((7,2.0),(2,-9.596168335700813))), (1,((4,2.0),(2,-9.596168335700813))), (0,((4,1.2),(2,4.541482001513894))), (2,((7,2.7),(0,-0.06636448259750038))), (1,((7,2.0),(1,0.10552081450086591))), (2,((8,2.727272727272727),(1,-0.48644238753215385))), (2,((1,2.25),(2,5.2866468281774734))), (0,((8,1.1111111111111112),(0,-0.03387252842009158))), (0,((3,1.25),(0,-0.03387252842009158))), (1,((3,2.0),(1,0.10552081450086591))), (2,((0,2.0),(1,-0.48644238753215385))), (2,((5,2.625),(0,-0.06636448259750038))), (1,((5,2.0),(0,-0.05259151798283574))), (2,((1,2.25),(0,-0.06636448259750038))), (2,((2,2.4),(1,-0.48644238753215385))), (2,((5,2.625),(1,-0.48644238753215385))), (2,((7,2.7),(2,5.2866468281774734))), (0,((6,1.1428571428571428),(0,-0.03387252842009158))), (0,((8,1.1111111111111112),(2,4.541482001513894))), (0,((1,1.5),(2,4.541482001513894))), (0,((5,1.1666666666666667),(2,4.541482001513894))), (2,((3,2.5),(0,-0.06636448259750038))), (1,((2,2.0),(0,-0.05259151798283574))), (2,((7,2.7),(1,-0.48644238753215385))), (0,((2,1.3333333333333333),(1,0.789223562443648))), (2,((9,2.75),(2,5.2866468281774734))), (0,((5,1.1666666666666667),(1,0.789223562443648))), (0,((0,2.0),(0,-0.03387252842009158))), (1,((8,2.0),(2,-9.596168335700813))), (0,((1,1.5),(0,-0.03387252842009158))), (2,((9,2.75),(1,-0.48644238753215385))), (2,((4,2.5714285714285716),(1,-0.48644238753215385))), (0,((0,2.0),(2,4.541482001513894))), (0,((8,1.1111111111111112),(1,0.789223562443648))), (0,((9,1.1),(2,4.541482001513894))), (0,((0,2.0),(1,0.789223562443648))), (1,((1,2.0),(1,0.10552081450086591))), (2,((6,2.6666666666666665),(1,-0.48644238753215385))), (0,((6,1.1428571428571428),(1,0.789223562443648))), (2,((9,2.75),(0,-0.06636448259750038))), (1,((9,2.0),(2,-9.596168335700813))), (2,((8,2.727272727272727),(2,5.2866468281774734))), (0,((4,1.2),(1,0.789223562443648))), (1,((5,2.0),(1,0.10552081450086591))), (1,((6,2.0),(1,0.10552081450086591))), (1,((5,2.0),(2,-9.596168335700813))), (1,((0,2.0),(2,-9.596168335700813))), (2,((5,2.625),(2,5.2866468281774734))), (1,((7,2.0),(0,-0.05259151798283574))), (0,((5,1.1666666666666667),(0,-0.03387252842009158))), (1,((1,2.0),(0,-0.05259151798283574))), (0,((9,1.1),(1,0.789223562443648))), (1,((9,2.0),(0,-0.05259151798283574))), (0,((7,1.125),(0,-0.03387252842009158))), (0,((3,1.25),(1,0.789223562443648))), (2,((3,2.5),(1,-0.48644238753215385))), (1,((2,2.0),(1,0.10552081450086591))), (1,((8,2.0),(1,0.10552081450086591))), (1,((0,2.0),(1,0.10552081450086591))), (2,((2,2.4),(0,-0.06636448259750038))), (2,((1,2.25),(1,-0.48644238753215385))), (0,((1,1.5),(1,0.789223562443648))), (2,((2,2.4),(2,5.2866468281774734))))) + } + test("join all-to-all") { val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (1, 3))) val rdd2 = sc.parallelize(Array((1, 'x'), (1, 'y')))