From e4c4880cffb7662796d0f126aa75c2810cb602eb Mon Sep 17 00:00:00 2001 From: garborg Date: Thu, 6 Feb 2014 19:37:28 -0600 Subject: [PATCH] Add crossjoin --- src/DataFrames.jl | 1 + src/dataframe/join.jl | 33 +++++++++++++++++++++++++++++++++ test/join.jl | 16 +++++++++++++++- 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/DataFrames.jl b/src/DataFrames.jl index 805d26f638..79749671ff 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -56,6 +56,7 @@ export @~, combine, complete_cases, complete_cases!, + crossjoin, cut, DataFrame, DataFrameRow, diff --git a/src/dataframe/join.jl b/src/dataframe/join.jl index 90416b1ab1..5d51c3528f 100644 --- a/src/dataframe/join.jl +++ b/src/dataframe/join.jl @@ -169,3 +169,36 @@ function Base.join(df1::AbstractDataFrame, throw(ArgumentError("Unknown kind of join requested")) end end + +## +## Crossjoin +## + +let + global crossjoin + function crossjoin(xs::Union(DataFrame, (Symbol, AbstractVector))...) + lens = Int[xlen(x) for x in xs] + d = DataFrame() + times = 1 + each = prod(lens) + for i in 1:length(xs) + ilen = lens[i] + each = fld(each, ilen) + addx!(d, xs[i], times, each) + times *= ilen + end + d + end + + xlen(x::DataFrame) = size(x, 1) + xlen(x::(Symbol, AbstractVector)) = size(x[2], 1) + + function addx!(d::DataFrame, x::DataFrame, times::Int, each::Int) + for c in x + addx!(d, c, times, each) + end + end + function addx!(d::DataFrame, x::(Symbol, AbstractVector), times::Int, each::Int) + d[x[1]] = rep(x[2], times, each) + end +end diff --git a/test/join.jl b/test/join.jl index 37745eeab0..84bd006ada 100644 --- a/test/join.jl +++ b/test/join.jl @@ -32,4 +32,18 @@ module TestJoin @test_throws join(df1, df2) join(df1, df2, on = [:A, :B]) -end \ No newline at end of file + + + df = DataFrame(A = 1:2, B = ['b', 'a']) + t1 = (:C, [2.4, 3.6]) + t2 = (:D, [2, 3]) + + crossjoin(df, df, t1, t2, df, t2) + + dft = DataFrame(A = [1, 1, 1, 1, 2, 2, 2, 2], + B = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a'], + C = [2.4, 2.4, 3.6, 3.6, 2.4, 2.4, 3.6, 3.6], + D = [2, 3, 2, 3, 2, 3, 2, 3]) + + @test crossjoin(df, t1, t2) == dft +end