From c61a0ecb985948ba9a104fdf0085fae0f2e48362 Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Sat, 20 Aug 2016 10:16:52 -0400
Subject: [PATCH 01/11] Parametrize ModelMatrix container type

---
 src/statsmodels/formula.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index 052fea86a4..17e94b40ae 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -48,8 +48,8 @@ type ModelFrame
     contrasts::Dict{Symbol, ContrastsMatrix}
 end
 
-type ModelMatrix{T <: @compat(Union{Float32, Float64})}
-    m::Matrix{T}
+type ModelMatrix{T <: @compat(Union{Matrix{Float32}, Matrix{Float64}, SparseMatrixCSC{Float32,Int}, SparseMatrixCSC{Float64,Int}})}
+    m::T
     assign::Vector{Int}
 end
 
@@ -479,7 +479,7 @@ function ModelMatrix(mf::ModelFrame)
         append!(assign, fill(i_term, size(blocks[end], 2)))
     end
 
-    ModelMatrix{Float64}(reduce(hcat, blocks), assign)
+    ModelMatrix{Matrix{Float64}}(reduce(hcat, blocks), assign)
 end
 
 

From d46fc37c775e4cf67f7ae80a5ffcf93157126653 Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Sat, 20 Aug 2016 16:53:25 -0400
Subject: [PATCH 02/11] Eliminate hardcoded model matrix container type when
 constructing from ModelFrame

---
 src/statsmodels/formula.jl | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index 17e94b40ae..072809fd81 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -48,7 +48,11 @@ type ModelFrame
     contrasts::Dict{Symbol, ContrastsMatrix}
 end
 
-type ModelMatrix{T <: @compat(Union{Matrix{Float32}, Matrix{Float64}, SparseMatrixCSC{Float32,Int}, SparseMatrixCSC{Float64,Int}})}
+modelmatrixcontainertypes = [Matrix{Float32}, Matrix{Float64},
+                         SparseMatrixCSC{Float32,Int},
+                         SparseMatrixCSC{Float64,Int}]
+
+type ModelMatrix{T <: Union{modelmatrixcontainertypes...}}
     m::T
     assign::Vector{Int}
 end
@@ -437,21 +441,21 @@ If there is an intercept in the model, that column occurs first and its
 Mixed-effects models include "random-effects" terms which are ignored when
 creating the model matrix.
 """
-function ModelMatrix(mf::ModelFrame)
+function ModelMatrix(T::Union{map(t->Type{t}, modelmatrixcontainertypes)...}, mf::ModelFrame)
     dfrm = mf.df
     terms = droprandomeffects(dropresponse!(mf.terms))
 
-    blocks = Matrix{Float64}[]
+    blocks = T[]
     assign = Int[]
     if terms.intercept
-        push!(blocks, ones(size(dfrm, 1), 1))  # columns of 1's is first block
-        push!(assign, 0)                        # this block corresponds to term zero
+        push!(blocks, convert(T, ones(size(dfrm, 1), 1)))  # columns of 1's is first block
+        push!(assign, 0)                                   # this block corresponds to term zero
     end
 
     factors = terms.factors
 
     ## Map eval. term name + redundancy bool to cached model matrix columns
-    eterm_cols = @compat Dict{Tuple{Symbol,Bool}, Array{Float64}}()
+    eterm_cols = @compat Dict{Tuple{Symbol,Bool}, T}()
     ## Accumulator for each term's vector of eval. term columns.
 
     ## TODO: this method makes multiple copies of the data in the ModelFrame:
@@ -462,7 +466,7 @@ function ModelMatrix(mf::ModelFrame)
     ## "promoted" full-rank versions of categorical columns for non-redundant
     ## eval. terms:
     for (i_term, term) in enumerate(terms.terms)
-        term_cols = Matrix{Float64}[]
+        term_cols = T[]
         ## Pull out the eval terms, and the non-redundancy flags for this term
         ff = Compat.view(factors, :, i_term)
         eterms = Compat.view(terms.eterms, ff)
@@ -479,8 +483,9 @@ function ModelMatrix(mf::ModelFrame)
         append!(assign, fill(i_term, size(blocks[end], 2)))
     end
 
-    ModelMatrix{Matrix{Float64}}(reduce(hcat, blocks), assign)
+    ModelMatrix{T}(reduce(hcat, blocks), assign)
 end
+ModelMatrix(mf::ModelFrame) = ModelMatrix(Matrix{Float64}, mf)
 
 
 """

From 288552c65eb6b0f3f6e7dc4bd8fcf31b0b6c15f7 Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Sun, 21 Aug 2016 09:58:29 -0400
Subject: [PATCH 03/11] More idiomatic model matrix constructor and relaxed
 container type restrictions

---
 src/statsmodels/formula.jl | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index 072809fd81..05775b4d22 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -48,11 +48,9 @@ type ModelFrame
     contrasts::Dict{Symbol, ContrastsMatrix}
 end
 
-modelmatrixcontainertypes = [Matrix{Float32}, Matrix{Float64},
-                         SparseMatrixCSC{Float32,Int},
-                         SparseMatrixCSC{Float64,Int}]
+typealias ModelMatrixContainer{T<:AbstractFloat} AbstractMatrix{T}
 
-type ModelMatrix{T <: Union{modelmatrixcontainertypes...}}
+type ModelMatrix{T <: ModelMatrixContainer}
     m::T
     assign::Vector{Int}
 end
@@ -441,15 +439,15 @@ If there is an intercept in the model, that column occurs first and its
 Mixed-effects models include "random-effects" terms which are ignored when
 creating the model matrix.
 """
-function ModelMatrix(T::Union{map(t->Type{t}, modelmatrixcontainertypes)...}, mf::ModelFrame)
+function (::Type{ModelMatrix{T}}){T<:ModelMatrixContainer}(mf::ModelFrame)
     dfrm = mf.df
     terms = droprandomeffects(dropresponse!(mf.terms))
 
     blocks = T[]
     assign = Int[]
     if terms.intercept
-        push!(blocks, convert(T, ones(size(dfrm, 1), 1)))  # columns of 1's is first block
-        push!(assign, 0)                                   # this block corresponds to term zero
+        push!(blocks, ones(size(dfrm, 1), 1))  # columns of 1's is first block
+        push!(assign, 0)                       # this block corresponds to term zero
     end
 
     factors = terms.factors
@@ -485,7 +483,7 @@ function ModelMatrix(T::Union{map(t->Type{t}, modelmatrixcontainertypes)...}, mf
 
     ModelMatrix{T}(reduce(hcat, blocks), assign)
 end
-ModelMatrix(mf::ModelFrame) = ModelMatrix(Matrix{Float64}, mf)
+ModelMatrix(mf::ModelFrame) = ModelMatrix{Matrix{Float64}}(mf)
 
 
 """

From e1b068ed3588db786360416ed16a00aeea4f49c4 Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Sun, 21 Aug 2016 11:03:45 -0400
Subject: [PATCH 04/11] Generalize modelmat_cols output typing

---
 src/statsmodels/formula.jl | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index 05775b4d22..a6c0df2ca2 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -323,8 +323,8 @@ function setcontrasts!(mf::ModelFrame, new_contrasts::Dict)
 end
 setcontrasts!(mf::ModelFrame; kwargs...) = setcontrasts!(mf, Dict(kwargs))
 
-asmatrix(a::AbstractMatrix) = a
-asmatrix(v::AbstractVector) = reshape(v, (length(v), 1))
+asmatrix(T::Type, a::AbstractMatrix) = convert(T, a)
+asmatrix(T::Type, v::AbstractVector) = convert(T, reshape(v, (length(v), 1)))
 
 """
     StatsBase.model_response(mf::ModelFrame)
@@ -339,33 +339,35 @@ function StatsBase.model_response(mf::ModelFrame)
     end
 end
 
-modelmat_cols(v::DataVector) = asmatrix(convert(Vector{Float64}, v.data))
-modelmat_cols(v::Vector) = asmatrix(convert(Vector{Float64}, v))
+modelmat_cols{T<:ModelMatrixContainer}(::Type{T}, v::DataVector) = asmatrix(T, convert(Vector{Float64}, v.data))
+modelmat_cols{T<:ModelMatrixContainer}(::Type{T}, v::Vector) = asmatrix(T, convert(Vector{Float64}, v))
+
 ## construct model matrix columns from model frame + name (checks for contrasts)
-function modelmat_cols(name::Symbol, mf::ModelFrame; non_redundant::Bool = false)
+function modelmat_cols{T<:ModelMatrixContainer}(::Type{T}, name::Symbol, mf::ModelFrame; non_redundant::Bool = false)
     if haskey(mf.contrasts, name)
-        modelmat_cols(mf.df[name],
+        modelmat_cols(T, mf.df[name],
                       non_redundant ?
                       ContrastsMatrix{FullDummyCoding}(mf.contrasts[name]) :
                       mf.contrasts[name])
     else
-        modelmat_cols(mf.df[name])
+        modelmat_cols(T, mf.df[name])
     end
 end
 
 """
-    modelmat_cols(v::PooledDataVector, contrast::ContrastsMatrix)
+    modelmat_cols(T::Type{ModelMatrixContainer}, v::PooledDataVector, contrast::ContrastsMatrix)
 
-Construct `ModelMatrix` columns based on specified contrasts, ensuring that
+Construct `ModelMatrix` columns of type `T` based on specified contrasts, ensuring that
 levels align properly.
 """
-function modelmat_cols(v::PooledDataVector, contrast::ContrastsMatrix)
+function modelmat_cols{T<:ModelMatrixContainer}(::Type{T}, v::PooledDataVector, contrast::ContrastsMatrix)
     ## make sure the levels of the contrast matrix and the categorical data
     ## are the same by constructing a re-indexing vector. Indexing into
     ## reindex with v.refs will give the corresponding row number of the
     ## contrast matrix
     reindex = [findfirst(contrast.levels, l) for l in levels(v)]
-    return contrast.matrix[reindex[v.refs], :]
+    contrastmatrix = convert(T, contrast.matrix)
+    return contrastmatrix[reindex[v.refs], :]
 end
 
 """
@@ -374,7 +376,7 @@ Create pairwise products of columns from a vector of matrices
 """
 function expandcols(trm::Vector)
     if length(trm) == 1
-        asmatrix(convert(Array{Float64}, trm[1]))
+        asmatrix(Matrix{Float64}, convert(Array{Float64}, trm[1]))
     else
         a = convert(Array{Float64}, trm[1])
         b = expandcols(trm[2 : end])
@@ -439,7 +441,8 @@ If there is an intercept in the model, that column occurs first and its
 Mixed-effects models include "random-effects" terms which are ignored when
 creating the model matrix.
 """
-function (::Type{ModelMatrix{T}}){T<:ModelMatrixContainer}(mf::ModelFrame)
+@compat function (::Type{ModelMatrix{T}}){T<:ModelMatrixContainer}(mf::ModelFrame)
+    sparsemm = T <: AbstractSparseMatrix
     dfrm = mf.df
     terms = droprandomeffects(dropresponse!(mf.terms))
 
@@ -473,7 +476,7 @@ function (::Type{ModelMatrix{T}}){T<:ModelMatrixContainer}(mf::ModelFrame)
         ## and storing as necessary)
         for (et, nr) in zip(eterms, non_redundants)
             if ! haskey(eterm_cols, (et, nr))
-                eterm_cols[(et, nr)] = modelmat_cols(et, mf, non_redundant=nr)
+                eterm_cols[(et, nr)] = modelmat_cols(T, et, mf, non_redundant=nr)
             end
             push!(term_cols, eterm_cols[(et, nr)])
         end

From 4a9a65f0c5bded221ae06dac88759fcc1ebb68a3 Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Sun, 21 Aug 2016 11:25:37 -0400
Subject: [PATCH 05/11] Generalize expandcols output types

---
 src/statsmodels/formula.jl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index a6c0df2ca2..628e15017e 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -374,12 +374,11 @@ end
     expandcols(trm::Vector)
 Create pairwise products of columns from a vector of matrices
 """
-function expandcols(trm::Vector)
+function expandcols{T<:ModelMatrixContainer}(trm::Vector{T})
     if length(trm) == 1
-        asmatrix(Matrix{Float64}, convert(Array{Float64}, trm[1]))
+        trm[1]
     else
-        a = convert(Array{Float64}, trm[1])
-        b = expandcols(trm[2 : end])
+        a, b = trm[1], expandcols(trm[2 : end])
         reduce(hcat, [broadcast(*, a, Compat.view(b, :, j)) for j in 1 : size(b, 2)])
     end
 end

From fd12a5d272cd8d4ff325592fb68b8bca1a00fde7 Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Sun, 21 Aug 2016 12:05:46 -0400
Subject: [PATCH 06/11] Added sparse ModelMatrix creation tests

---
 test/formula.jl | 47 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/test/formula.jl b/test/formula.jl
index b09dd715d9..62864100d7 100644
--- a/test/formula.jl
+++ b/test/formula.jl
@@ -107,6 +107,8 @@ module TestFormula
 
     ## Tests for constructing ModelFrame and ModelMatrix
 
+    sparsetype = SparseMatrixCSC{Float64,Int}
+
     d = DataFrame()
     d[:y] = [1:4;]
     d[:x1] = [5:8;]
@@ -127,6 +129,10 @@ module TestFormula
     @test mm.m[:,1] == ones(4)
     @test mm.m[:,2:3] == [x1 x2]
 
+    smm = ModelMatrix{sparsetype}(mf)
+    @test issparse(smm.m)
+    @test mm.m == smm.m
+
     #test_group("expanding a PooledVec into a design matrix of indicators for each dummy variable")
 
     d[:x1p] = PooledDataArray(d[:x1])
@@ -138,6 +144,10 @@ module TestFormula
     @test mm.m[:,4] == [0, 0, 0, 1.]
     @test coefnames(mf)[2:end] == ["x1p: 6", "x1p: 7", "x1p: 8"]
 
+    smm = ModelMatrix{sparsetype}(mf)
+    @test issparse(smm.m)
+    @test mm.m == smm.m
+
     #test_group("create a design matrix from interactions from two DataFrames")
     ## this was removed in commit dead4562506badd7e84a2367086f5753fa49bb6a
 
@@ -199,11 +209,13 @@ module TestFormula
     mf = ModelFrame(f, df)
     mm = ModelMatrix(mf)
     @test mm.m == [ones(4) x1.*x2]
+    @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     f = y ~ x1 * x2
     mf = ModelFrame(f, df)
     mm = ModelMatrix(mf)
     @test mm.m == [ones(4) x1 x2 x1.*x2]
+    @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     df[:x1] = PooledDataArray(x1)
     x1e = [[0, 1, 0, 0] [0, 0, 1, 0] [0, 0, 0, 1]]
@@ -211,6 +223,7 @@ module TestFormula
     mf = ModelFrame(f, df)
     mm = ModelMatrix(mf)
     @test mm.m == [ones(4) x1e x2 [0, 10, 0, 0] [0, 0, 11, 0] [0, 0, 0, 12]]
+    @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     #test_group("Basic transformations")
 
@@ -261,6 +274,7 @@ module TestFormula
     mf = ModelFrame(y ~ x2, d)
     mm = ModelMatrix(mf)
     @test mm.m == [ones(4) x2]
+    @test mm.m == ModelMatrix{sparsetype}(mf).m
     ## @test model_response(mf) == y''     # fails: Int64 vs. Float64
 
     df = deepcopy(d)
@@ -294,11 +308,13 @@ module TestFormula
     mf = ModelFrame(f, df)
     mm = ModelMatrix(mf)
     @test mm.m == [ones(4) x2.*x3.*x4]
+    @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     f = y ~ x1 & x2 & x3
     mf = ModelFrame(f, df)
     mm = ModelMatrix(mf)
     @test mm.m[:, 2:end] == diagm(x2.*x3)
+    @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     #test_group("Column groups in formulas")
     ## set_group was removed in The Great Purge (55e47cd)
@@ -346,6 +362,7 @@ module TestFormula
     mf = ModelFrame(f, df)
     mm = ModelMatrix(mf)
     @test mm.m == hcat(ones(4), x1.*x3, x1.*x4, x2.*x3, x2.*x4)
+    @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     ## Condensing nested :+ calls
     f = y ~ x1 + (x2 + (x3 + x4))
@@ -368,6 +385,7 @@ module TestFormula
     mf = ModelFrame(y ~ x1m, d)
     mm = ModelMatrix(mf)
     @test mm.m[:, 2] == d[complete_cases(d), :x1m]
+    @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     ## Same variable on left and right side
     mf = ModelFrame(x1 ~ x1, df)
@@ -386,7 +404,8 @@ d[:n] = 1.:8
 
 ## No intercept
 mf = ModelFrame(n ~ 0 + x, d, contrasts=cs)
-@test ModelMatrix(mf).m == [1 0
+mm = ModelMatrix(mf)
+@test mm.m == [1 0
                             0 1
                             1 0
                             0 1
@@ -394,11 +413,13 @@ mf = ModelFrame(n ~ 0 + x, d, contrasts=cs)
                             0 1
                             1 0
                             0 1]
+@test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a", "x: b"]
 
 ## No first-order term for interaction
 mf = ModelFrame(n ~ 1 + x + x&y, d, contrasts=cs)
-@test ModelMatrix(mf).m[:, 2:end] == [-1 -1  0
+mm = ModelMatrix(mf)
+@test mm.m[:, 2:end] == [-1 -1  0
                                        1  0 -1
                                       -1  1  0
                                        1  0  1
@@ -406,11 +427,13 @@ mf = ModelFrame(n ~ 1 + x + x&y, d, contrasts=cs)
                                        1  0 -1
                                       -1  1  0
                                        1  0  1]
+@test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["(Intercept)", "x: b", "x: a & y: d", "x: b & y: d"]
 
 ## When both terms of interaction are non-redundant:
 mf = ModelFrame(n ~ 0 + x&y, d, contrasts=cs)
-@test ModelMatrix(mf).m == [1 0 0 0
+mm = ModelMatrix(mf)
+@test mm.m == [1 0 0 0
                             0 1 0 0
                             0 0 1 0
                             0 0 0 1
@@ -418,19 +441,23 @@ mf = ModelFrame(n ~ 0 + x&y, d, contrasts=cs)
                             0 1 0 0
                             0 0 1 0
                             0 0 0 1]
+@test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",                             
                         "x: a & y: d", "x: b & y: d"]
 
 # only a three-way interaction: every term is promoted.
 mf = ModelFrame(n ~ 0 + x&y&z, d, contrasts=cs)
-@test ModelMatrix(mf).m == eye(8)
+mm = ModelMatrix(mf)
+@test mm.m == eye(8)
+@test mm.m == ModelMatrix{sparsetype}(mf).m
 
 # two two-way interactions, with no lower-order term. both are promoted in
 # first (both x and y), but only the old term (x) in the second (because
 # dropping x gives z which isn't found elsewhere, but dropping z gives x
 # which is found (implicitly) in the promoted interaction x&y).
 mf = ModelFrame(n ~ 0 + x&y + x&z, d, contrasts=cs)
-@test ModelMatrix(mf).m == [1 0 0 0 -1  0
+mm = ModelMatrix(mf)
+@test mm.m == [1 0 0 0 -1  0
                             0 1 0 0  0 -1
                             0 0 1 0 -1  0
                             0 0 0 1  0 -1
@@ -438,6 +465,7 @@ mf = ModelFrame(n ~ 0 + x&y + x&z, d, contrasts=cs)
                             0 1 0 0  0  1
                             0 0 1 0  1  0
                             0 0 0 1  0  1]
+@test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
                         "x: a & y: d", "x: b & y: d",
                         "x: a & z: f", "x: b & z: f"]
@@ -446,7 +474,8 @@ mf = ModelFrame(n ~ 0 + x&y + x&z, d, contrasts=cs)
 # this is because dropping x gives y&z which isn't present, but dropping y or z
 # gives x&z or x&z respectively, which are both present.
 mf = ModelFrame(n ~ 0 + x&y + x&z + x&y&z, d, contrasts=cs)
-@test ModelMatrix(mf).m == [1 0 0 0 -1  0  1  0
+mm = ModelMatrix(mf)
+@test mm.m == [1 0 0 0 -1  0  1  0
                             0 1 0 0  0 -1  0  1
                             0 0 1 0 -1  0 -1  0
                             0 0 0 1  0 -1  0 -1
@@ -454,6 +483,7 @@ mf = ModelFrame(n ~ 0 + x&y + x&z + x&y&z, d, contrasts=cs)
                             0 1 0 0  0  1  0 -1
                             0 0 1 0  1  0  1  0
                             0 0 0 1  0  1  0  1]
+@test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
                         "x: a & y: d", "x: b & y: d",
                         "x: a & z: f", "x: b & z: f",
@@ -463,7 +493,8 @@ mf = ModelFrame(n ~ 0 + x&y + x&z + x&y&z, d, contrasts=cs)
 # promoted in both (along with lower-order term), because in every case, when
 # x is dropped, the remaining terms (1, y, and z) aren't present elsewhere.
 mf = ModelFrame(n ~ 0 + x + x&y + x&z, d, contrasts=cs)
-@test ModelMatrix(mf).m == [1 0 -1  0 -1  0
+mm = ModelMatrix(mf)
+@test mm.m == [1 0 -1  0 -1  0
                             0 1  0 -1  0 -1
                             1 0  1  0 -1  0
                             0 1  0  1  0 -1
@@ -471,12 +502,12 @@ mf = ModelFrame(n ~ 0 + x + x&y + x&z, d, contrasts=cs)
                             0 1  0 -1  0  1
                             1 0  1  0  1  0
                             0 1  0  1  0  1]
+@test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a", "x: b",
                         "x: a & y: d", "x: b & y: d",
                         "x: a & z: f", "x: b & z: f"]
 
 
-
 ## FAILS: When both terms are non-redundant and intercept is PRESENT
 ## (not fully redundant). Ideally, would drop last column. Might make sense
 ## to warn about this, and suggest recoding x and y into a single variable.

From ff6f7062fa4a3858662ab74566fa9cec3572b3de Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Sun, 21 Aug 2016 15:21:10 -0400
Subject: [PATCH 07/11] More explicit model matrix constructor type output
 testing

---
 test/formula.jl | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/test/formula.jl b/test/formula.jl
index 62864100d7..401d17b9ee 100644
--- a/test/formula.jl
+++ b/test/formula.jl
@@ -126,13 +126,15 @@ module TestFormula
     @test coefnames(mf) == ["(Intercept)","x1","x2"]
     ## @test model_response(mf) == transpose([1. 2 3 4]) # fails: Int64 vs. Float64
     mm = ModelMatrix(mf)
+    smm = ModelMatrix{sparsetype}(mf)
     @test mm.m[:,1] == ones(4)
     @test mm.m[:,2:3] == [x1 x2]
-
-    smm = ModelMatrix{sparsetype}(mf)
-    @test issparse(smm.m)
     @test mm.m == smm.m
 
+    @test isa(mm.m, Matrix{Float64})
+    @test isa(smm.m, sparsetype)
+    @test isa(ModelMatrix{DataMatrix{Float64}}(mf).m, DataMatrix{Float64})
+
     #test_group("expanding a PooledVec into a design matrix of indicators for each dummy variable")
 
     d[:x1p] = PooledDataArray(d[:x1])
@@ -143,10 +145,7 @@ module TestFormula
     @test mm.m[:,3] == [0, 0, 1., 0]
     @test mm.m[:,4] == [0, 0, 0, 1.]
     @test coefnames(mf)[2:end] == ["x1p: 6", "x1p: 7", "x1p: 8"]
-
-    smm = ModelMatrix{sparsetype}(mf)
-    @test issparse(smm.m)
-    @test mm.m == smm.m
+    @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     #test_group("create a design matrix from interactions from two DataFrames")
     ## this was removed in commit dead4562506badd7e84a2367086f5753fa49bb6a

From f94dd836d1aa3c18f9b70e486f027cd92ac70033 Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Sun, 21 Aug 2016 15:48:25 -0400
Subject: [PATCH 08/11] Rename ModelMatrixContainer and remove unneeded
 variables/methods

---
 src/statsmodels/formula.jl | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index 628e15017e..b6de297079 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -48,9 +48,9 @@ type ModelFrame
     contrasts::Dict{Symbol, ContrastsMatrix}
 end
 
-typealias ModelMatrixContainer{T<:AbstractFloat} AbstractMatrix{T}
+typealias AbstractFloatMatrix{T<:AbstractFloat} AbstractMatrix{T}
 
-type ModelMatrix{T <: ModelMatrixContainer}
+type ModelMatrix{T <: AbstractFloatMatrix}
     m::T
     assign::Vector{Int}
 end
@@ -323,9 +323,6 @@ function setcontrasts!(mf::ModelFrame, new_contrasts::Dict)
 end
 setcontrasts!(mf::ModelFrame; kwargs...) = setcontrasts!(mf, Dict(kwargs))
 
-asmatrix(T::Type, a::AbstractMatrix) = convert(T, a)
-asmatrix(T::Type, v::AbstractVector) = convert(T, reshape(v, (length(v), 1)))
-
 """
     StatsBase.model_response(mf::ModelFrame)
 Extract the response column, if present.  `DataVector` or
@@ -339,11 +336,8 @@ function StatsBase.model_response(mf::ModelFrame)
     end
 end
 
-modelmat_cols{T<:ModelMatrixContainer}(::Type{T}, v::DataVector) = asmatrix(T, convert(Vector{Float64}, v.data))
-modelmat_cols{T<:ModelMatrixContainer}(::Type{T}, v::Vector) = asmatrix(T, convert(Vector{Float64}, v))
-
 ## construct model matrix columns from model frame + name (checks for contrasts)
-function modelmat_cols{T<:ModelMatrixContainer}(::Type{T}, name::Symbol, mf::ModelFrame; non_redundant::Bool = false)
+function modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, name::Symbol, mf::ModelFrame; non_redundant::Bool = false)
     if haskey(mf.contrasts, name)
         modelmat_cols(T, mf.df[name],
                       non_redundant ?
@@ -354,13 +348,16 @@ function modelmat_cols{T<:ModelMatrixContainer}(::Type{T}, name::Symbol, mf::Mod
     end
 end
 
+modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::DataVector) = convert(T, reshape(v.data, length(v), 1))
+modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::Vector) = convert(T, reshape(v, length(v), 1))
+
 """
-    modelmat_cols(T::Type{ModelMatrixContainer}, v::PooledDataVector, contrast::ContrastsMatrix)
+    modelmat_cols(T::Type{AbstractFloatMatrix}, v::PooledDataVector, contrast::ContrastsMatrix)
 
 Construct `ModelMatrix` columns of type `T` based on specified contrasts, ensuring that
 levels align properly.
 """
-function modelmat_cols{T<:ModelMatrixContainer}(::Type{T}, v::PooledDataVector, contrast::ContrastsMatrix)
+function modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::PooledDataVector, contrast::ContrastsMatrix)
     ## make sure the levels of the contrast matrix and the categorical data
     ## are the same by constructing a re-indexing vector. Indexing into
     ## reindex with v.refs will give the corresponding row number of the
@@ -374,7 +371,7 @@ end
     expandcols(trm::Vector)
 Create pairwise products of columns from a vector of matrices
 """
-function expandcols{T<:ModelMatrixContainer}(trm::Vector{T})
+function expandcols{T<:AbstractFloatMatrix}(trm::Vector{T})
     if length(trm) == 1
         trm[1]
     else
@@ -440,8 +437,7 @@ If there is an intercept in the model, that column occurs first and its
 Mixed-effects models include "random-effects" terms which are ignored when
 creating the model matrix.
 """
-@compat function (::Type{ModelMatrix{T}}){T<:ModelMatrixContainer}(mf::ModelFrame)
-    sparsemm = T <: AbstractSparseMatrix
+@compat function (::Type{ModelMatrix{T}}){T<:AbstractFloatMatrix}(mf::ModelFrame)
     dfrm = mf.df
     terms = droprandomeffects(dropresponse!(mf.terms))
 

From dd0ae91a69dd3e012b8e932811e3be7810849788 Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Sun, 21 Aug 2016 20:40:31 -0400
Subject: [PATCH 09/11] Split value assignment onto two lines

---
 src/statsmodels/formula.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index b6de297079..413faaf4c4 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -375,7 +375,8 @@ function expandcols{T<:AbstractFloatMatrix}(trm::Vector{T})
     if length(trm) == 1
         trm[1]
     else
-        a, b = trm[1], expandcols(trm[2 : end])
+        a = trm[1]
+        b = expandcols(trm[2 : end])
         reduce(hcat, [broadcast(*, a, Compat.view(b, :, j)) for j in 1 : size(b, 2)])
     end
 end

From db5831862e86fc0cf445b3684b669dd276b9b4d0 Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Mon, 22 Aug 2016 19:45:48 -0400
Subject: [PATCH 10/11] Fix test result spacing and incorrect method signature
 documentation

---
 src/statsmodels/formula.jl |  2 +-
 test/formula.jl            | 84 +++++++++++++++++++-------------------
 2 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index 413faaf4c4..2f13d7019c 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -352,7 +352,7 @@ modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::DataVector) = convert(T, res
 modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::Vector) = convert(T, reshape(v, length(v), 1))
 
 """
-    modelmat_cols(T::Type{AbstractFloatMatrix}, v::PooledDataVector, contrast::ContrastsMatrix)
+    modelmat_cols(::Type{T}, v::PooledDataVector, contrast::ContrastsMatrix)
 
 Construct `ModelMatrix` columns of type `T` based on specified contrasts, ensuring that
 levels align properly.
diff --git a/test/formula.jl b/test/formula.jl
index 401d17b9ee..5ae777e632 100644
--- a/test/formula.jl
+++ b/test/formula.jl
@@ -405,13 +405,13 @@ d[:n] = 1.:8
 mf = ModelFrame(n ~ 0 + x, d, contrasts=cs)
 mm = ModelMatrix(mf)
 @test mm.m == [1 0
-                            0 1
-                            1 0
-                            0 1
-                            1 0
-                            0 1
-                            1 0
-                            0 1]
+               0 1
+               1 0
+               0 1
+               1 0
+               0 1
+               1 0
+               0 1]
 @test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a", "x: b"]
 
@@ -419,13 +419,13 @@ mm = ModelMatrix(mf)
 mf = ModelFrame(n ~ 1 + x + x&y, d, contrasts=cs)
 mm = ModelMatrix(mf)
 @test mm.m[:, 2:end] == [-1 -1  0
-                                       1  0 -1
-                                      -1  1  0
-                                       1  0  1
-                                      -1 -1  0
-                                       1  0 -1
-                                      -1  1  0
-                                       1  0  1]
+                         1  0 -1
+                         -1  1  0
+                         1  0  1
+                         -1 -1  0
+                         1  0 -1
+                         -1  1  0
+                         1  0  1]
 @test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["(Intercept)", "x: b", "x: a & y: d", "x: b & y: d"]
 
@@ -433,13 +433,13 @@ mm = ModelMatrix(mf)
 mf = ModelFrame(n ~ 0 + x&y, d, contrasts=cs)
 mm = ModelMatrix(mf)
 @test mm.m == [1 0 0 0
-                            0 1 0 0
-                            0 0 1 0
-                            0 0 0 1
-                            1 0 0 0
-                            0 1 0 0
-                            0 0 1 0
-                            0 0 0 1]
+               0 1 0 0
+               0 0 1 0
+               0 0 0 1
+               1 0 0 0
+               0 1 0 0
+               0 0 1 0
+               0 0 0 1]
 @test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",                             
                         "x: a & y: d", "x: b & y: d"]
@@ -457,13 +457,13 @@ mm = ModelMatrix(mf)
 mf = ModelFrame(n ~ 0 + x&y + x&z, d, contrasts=cs)
 mm = ModelMatrix(mf)
 @test mm.m == [1 0 0 0 -1  0
-                            0 1 0 0  0 -1
-                            0 0 1 0 -1  0
-                            0 0 0 1  0 -1
-                            1 0 0 0  1  0
-                            0 1 0 0  0  1
-                            0 0 1 0  1  0
-                            0 0 0 1  0  1]
+               0 1 0 0  0 -1
+               0 0 1 0 -1  0
+               0 0 0 1  0 -1
+               1 0 0 0  1  0
+               0 1 0 0  0  1
+               0 0 1 0  1  0
+               0 0 0 1  0  1]
 @test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
                         "x: a & y: d", "x: b & y: d",
@@ -475,13 +475,13 @@ mm = ModelMatrix(mf)
 mf = ModelFrame(n ~ 0 + x&y + x&z + x&y&z, d, contrasts=cs)
 mm = ModelMatrix(mf)
 @test mm.m == [1 0 0 0 -1  0  1  0
-                            0 1 0 0  0 -1  0  1
-                            0 0 1 0 -1  0 -1  0
-                            0 0 0 1  0 -1  0 -1
-                            1 0 0 0  1  0 -1  0
-                            0 1 0 0  0  1  0 -1
-                            0 0 1 0  1  0  1  0
-                            0 0 0 1  0  1  0  1]
+               0 1 0 0  0 -1  0  1
+               0 0 1 0 -1  0 -1  0
+               0 0 0 1  0 -1  0 -1
+               1 0 0 0  1  0 -1  0
+               0 1 0 0  0  1  0 -1
+               0 0 1 0  1  0  1  0
+               0 0 0 1  0  1  0  1]
 @test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
                         "x: a & y: d", "x: b & y: d",
@@ -494,13 +494,13 @@ mm = ModelMatrix(mf)
 mf = ModelFrame(n ~ 0 + x + x&y + x&z, d, contrasts=cs)
 mm = ModelMatrix(mf)
 @test mm.m == [1 0 -1  0 -1  0
-                            0 1  0 -1  0 -1
-                            1 0  1  0 -1  0
-                            0 1  0  1  0 -1
-                            1 0 -1  0  1  0
-                            0 1  0 -1  0  1
-                            1 0  1  0  1  0
-                            0 1  0  1  0  1]
+               0 1  0 -1  0 -1
+               1 0  1  0 -1  0
+               0 1  0  1  0 -1
+               1 0 -1  0  1  0
+               0 1  0 -1  0  1
+               1 0  1  0  1  0
+               0 1  0  1  0  1]
 @test mm.m == ModelMatrix{sparsetype}(mf).m
 @test coefnames(mf) == ["x: a", "x: b",
                         "x: a & y: d", "x: b & y: d",

From 25935c05358cfd9f50fdc302712c73b86dd35eec Mon Sep 17 00:00:00 2001
From: Gord Stephen <gord@gordstephen.ca>
Date: Fri, 26 Aug 2016 17:17:35 -0400
Subject: [PATCH 11/11] Docstring updates

---
 src/statsmodels/formula.jl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index 2f13d7019c..165fce802d 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -352,7 +352,7 @@ modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::DataVector) = convert(T, res
 modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::Vector) = convert(T, reshape(v, length(v), 1))
 
 """
-    modelmat_cols(::Type{T}, v::PooledDataVector, contrast::ContrastsMatrix)
+    modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::PooledDataVector, contrast::ContrastsMatrix)
 
 Construct `ModelMatrix` columns of type `T` based on specified contrasts, ensuring that
 levels align properly.
@@ -368,7 +368,7 @@ function modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::PooledDataVector, c
 end
 
 """
-    expandcols(trm::Vector)
+    expandcols{T<:AbstractFloatMatrix}(trm::Vector{T})
 Create pairwise products of columns from a vector of matrices
 """
 function expandcols{T<:AbstractFloatMatrix}(trm::Vector{T})
@@ -423,8 +423,9 @@ end
 
 
 """
-    ModelMatrix(mf::ModelFrame)
-Create a `ModelMatrix` from the `terms` and `df` members of `mf`
+    ModelMatrix{T<:AbstractFloatMatrix}(mf::ModelFrame)
+Create a `ModelMatrix` of type `T` (default `Matrix{Float64}`) from the
+`terms` and `df` members of `mf`.
 
 This is basically a map-reduce where terms are mapped to columns by `cols`
 and reduced by `hcat`.  During the collection of the columns the `assign`