Skip to content

Commit

Permalink
Eliminate l,u,p = lu(A) and friends in favor of using factorizations
Browse files Browse the repository at this point in the history
The compatibility function "factors" is introduced
  • Loading branch information
timholy committed Sep 5, 2012
1 parent cbaad36 commit 826c42f
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 147 deletions.
17 changes: 10 additions & 7 deletions base/export.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,12 @@ export
Zip,
Stat,
Factorization,
Cholesky,
LU,
CholeskyDense,
LUDense,
LUTridiagonal,
LDLT,
LDLTTridiagonal,
QR,
QRP,
QRDense,
QRPDense,

# Exceptions
ArgumentError,
Expand Down Expand Up @@ -623,22 +622,26 @@ export
eig,
expm,
eye,
factors,
ishermitian,
issym,
issym_rnd,
istril,
istriu,
kron,
ldlt,
linreg,
lu,
lu!,
matmul2x2,
matmul3x3,
norm,
qr,
qrp,
randsym,
rank,
rref,
sdd,
solve,
svd,
svdvals,
Expand All @@ -647,8 +650,8 @@ export
trideig,
tril,
triu,
qrp,
sdd,
tril!,
triu!,

# dequeues
append!,
Expand Down
166 changes: 102 additions & 64 deletions base/factorizations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -112,50 +112,95 @@ end

abstract Factorization{T}

type Cholesky{T} <: Factorization{T}
type CholeskyDense{T} <: Factorization{T}
LR::Matrix{T}
uplo::LapackChar
function Cholesky(A::Matrix{T}, ul::LapackChar)
if ul != 'U' && ul != 'L' error("Cholesky: uplo must be 'U' or 'L'") end
Acopy = copy(A)
_jl_lapack_potrf(ul, Acopy) == 0 ? new(ul == 'U' ? triu(Acopy) : tril(Acopy), ul) : error("Cholesky: Matrix is not positive-definite")
end

# chol() does not check that input matrix is symmetric/hermitian
# It simply uses upper triangular half
function chol{T<:LapackScalar}(A::Matrix{T}, ul::LapackChar)
if ul != 'U' && ul != 'L'; error("Cholesky: uplo must be 'U' or 'L'"); end
C = CholeskyDense{T}(Array(T, size(A)), ul)
chol(C, A)
end
chol{T<:Real}(A::Matrix{T}, ul::LapackChar) = chol(float64(A), ul)
chol(A) = chol(A, 'U')

function _chol{T<:LapackScalar}(C::CholeskyDense{T})
if _jl_lapack_potrf(C.uplo, C.LR) != 0
error("Cholesky: Matrix is not positive-definite")
end
if C.uplo == 'U'
triu!(C.LR)
else
tril!(C.LR)
end
C
end
function chol{T}(C::CholeskyDense{T}, A::Matrix{T})
copy_to(C.LR, A)
_chol(C)
end
function chol!{T<:LapackScalar}(A::Matrix{T}, ul::LapackChar)
C = CholeskyDense{T}(A, ul)
_chol(C)
end

factors(C::CholeskyDense) = C.LR

Cholesky(A::Matrix) = Cholesky{eltype(A)}(A, 'U')

(\){T<:Union(Float64,Float32,Complex128,Complex64)}(C::Cholesky{T}, B::StridedVecOrMat{T}) =
(\){T<:LapackScalar}(C::CholeskyDense{T}, B::StridedVecOrMat{T}) =
_jl_lapack_potrs(C.uplo, C.LR, copy(B))

inv(C::Cholesky) = _jl_lapack_potri(C.uplo, copy(C.LR)) # should symmetrize the result
inv{T<:LapackScalar}(C::CholeskyDense{T}) = _jl_lapack_potri(C.uplo, copy(C.LR)) # should symmetrize the result

type LU{T} <: Factorization{T}
type LUDense{T} <: Factorization{T}
lu::Matrix{T}
ipiv::Vector{Int32}
function LU(lu::Matrix{T}, ipiv::Vector{Int32})
function LUDense(lu::Matrix{T}, ipiv::Vector{Int32})
m, n = size(lu)
m == numel(ipiv) ? new(lu, ipiv) : error("LU: dimension mismatch")
m == numel(ipiv) ? new(lu, ipiv) : error("LUDense: dimension mismatch")
end
end
size(A::LUDense) = size(A.lu)
size(A::LUDense,n) = size(A.lu,n)

function LU{T<:Union(Float64,Float32,Complex64,Complex128)}(A::Matrix{T})
lu, ipiv = _jl_lapack_getrf(copy(A))
LU{T}(lu, ipiv)
lu{T<:LapackScalar}(A::Matrix{T}) = lu!(copy(A))
function lu!{T<:LapackScalar}(A::Matrix{T})
lu, ipiv = _jl_lapack_getrf(A)
LUDense{T}(lu, ipiv)
end

LU{T<:Number}(A::Matrix{T}) = LU(float64(A))
lu{T<:Real}(A::Matrix{T}) = lu(float64(A))

function det(lu::LU)
function det(lu::LUDense)
m, n = size(lu.lu)
if m != n error("det only defined for square matrices") end
prod(diag(lu.lu)) * (bool(sum(lu.ipiv .!= 1:n) % 2) ? -1 : 1)
end

det(A::Matrix) = det(LU(A))
det(A::Matrix) = det(lu(A))

(\){T<:Union(Float64,Float32,Complex128,Complex64)}(lu::LU{T}, B::StridedVecOrMat{T}) =
(\){T<:LapackScalar}(lu::LUDense{T}, B::StridedVecOrMat{T}) =
_jl_lapack_getrs('N', lu.lu, lu.ipiv, copy(B))
inv(lu::LU) = _jl_lapack_getri(copy(lu.lu), lu.ipiv)

inv{T<:LapackScalar}(lu::LUDense{T}) = _jl_lapack_getri(copy(lu.lu), lu.ipiv)

function factors{T<:LapackScalar}(lu::LUDense{T})
LU, ipiv = lu.lu, lu.ipiv
m, n = size(LU)

L = m >= n ? tril(LU, -1) + eye(m,n) : tril(LU, -1)[:, 1:m] + eye(m,m)
U = m <= n ? triu(LU) : triu(LU)[1:n, :]
P = [1:m]
for i=1:min(m,n)
t = P[i]
P[i] = P[ipiv[i]]
P[ipiv[i]] = t
end
L, U, P
end

## Multiplication by Q or Q' from a QR factorization
for (orm2r, elty) in
Expand Down Expand Up @@ -193,82 +238,80 @@ for (orm2r, elty) in
end
end

type QR{T} <: Factorization{T}
abstract QRFactorization{T} <: Factorization{T}

## QR decomposition without column pivots
type QRDense{T} <: QRFactorization{T}
hh::Matrix{T} # Householder transformations and R
tau::Vector{T} # Scalar factors of transformations
function QR(hh::Matrix{T}, tt::Vector{T})
function QRDense(hh::Matrix{T}, tt::Vector{T})
numel(tt) == min(size(hh)) ? new(hh, tt) : error("QR: mismatched dimensions")
end
end
size(A::QRFactorization) = size(A.hh)
size(A::QRFactorization,n) = size(A.hh,n)

function QR(A::Matrix)
function qr{T<:LapackScalar}(A::StridedMatrix{T})
hh, tt = _jl_lapack_geqrf(copy(A))
QR{typeof(A[1])}(hh, tt)
QRDense{eltype(A)}(hh, tt)
end

size{T<:Union(Float64,Float32,Complex128,Complex64)}(A::QR{T}) = size(A.hh)
size{T<:Union(Float64,Float32,Complex128,Complex64)}(A::QR{T},n) = size(A.hh,n)
qr{T<:Real}(x::StridedMatrix{T}) = qr(float64(x))

function factors{T<:LapackScalar}(qrd::QRDense{T})
aa, tau = qrd.hh, qrd.tau
R = triu(aa[1:min(size(qrd)),:])
_jl_lapack_orgqr(aa, tau), R
end

## Multiplication by Q from the QR decomposition
function (*){T<:Union(Float64,Float32,Complex128,Complex64)}(A::QR{T},
B::StridedVecOrMat{T})
(*){T<:LapackScalar}(A::QRFactorization{T}, B::StridedVecOrMat{T}) =
_jl_lapack_orm2r('L', 'N', A.hh, size(A, 2), A.tau, copy(B))
end

## Multiplication by Q' from the QR decomposition
function Ac_mul_B{T<:Union(Float64,Float32,Complex128,Complex64)}(A::QR{T},
B::StridedVecOrMat{T})
Ac_mul_B{T<:LapackScalar}(A::QRFactorization{T}, B::StridedVecOrMat{T}) =
_jl_lapack_orm2r('L', iscomplex(A.tau) ? 'C' : 'T', A.hh, size(A, 2), A.tau, copy(B))
end

## Least squares solution. Should be more careful about cases with m < n
function (\){T<:Union(Float64,Float32,Complex128,Complex64)}(A::QR{T},
B::StridedVecOrMat{T})
function (\){T<:LapackScalar}(A::QRDense{T}, B::StridedVecOrMat{T})
n = numel(A.tau)
qtb = isa(B, Vector) ? (A' * B)[1:n] : (A' * B)[1:n, :]
## Not sure if this avoids copying A.hh[1:n,:] but at least it is not all of A.hh
_jl_lapack_trtrs('U','N','N', A.hh[1:n,:], qtb)
end

type QRP{T} <: Factorization{T}
type QRPDense{T} <: QRFactorization{T}
hh::Matrix{T}
tau::Vector{T}
jpvt::Vector{Int32}
function QRP(hh::Matrix{T}, tt::Vector{T}, jj::Vector{Int32})
function QRPDense(hh::Matrix{T}, tt::Vector{T}, jj::Vector{Int32})
m, n = size(hh)
numel(tt) == min(m,n) && numel(jj) == n ? new(hh,tt,jj) : error("QRP: mismatched dimensions")
numel(tt) == min(m,n) && numel(jj) == n ? new(hh,tt,jj) : error("QRPDense: mismatched dimensions")
end
end

function QRP(A::Matrix)
hh, tt, jj = _jl_lapack_geqp3(copy(A))
QRP{typeof(A[1])}(hh, tt, jj)
end

## Not sure how to avoid cut-and-paste programming on these.
## Create another abstract type with both QR{T} and QRP{T} as subtypes?
size{T<:Union(Float64,Float32,Complex128,Complex64)}(A::QRP{T}) = size(A.hh)
size{T<:Union(Float64,Float32,Complex128,Complex64)}(A::QRP{T},n) = size(A.hh,n)

function (*){T<:Union(Float64,Float32,Complex128,Complex64)}(A::QRP{T},
B::StridedVecOrMat{T})
_jl_lapack_orm2r('L', 'N', A.hh, size(A, 2), A.tau, copy(B))
function qrp{T<:LapackScalar}(A::StridedMatrix{T})
aa, tau, jpvt = _jl_lapack_geqp3(copy(A))
QRPDense{T}(aa, tau, jpvt)
end
qrp{T<:Real}(x::StridedMatrix{T}) = qrp(float64(x))

function Ac_mul_B{T<:Union(Float64,Float32,Complex128,Complex64)}(A::QRP{T},
B::StridedVecOrMat{T})
_jl_lapack_orm2r('L', iscomplex(A.tau) ? 'C' : 'T', A.hh, size(A, 2), A.tau, copy(B))
function factors{T<:LapackScalar}(qrpd::QRPDense{T})
aa, tau = qrpd.hh, qrpd.tau
R = triu(aa[1:min(size(qrpd)),:])
_jl_lapack_orgqr(aa, tau), R, qrpd.jpvt
end

function (\){T<:Union(Float64,Float32,Complex128,Complex64)}(A::QRP{T},
# FIXME \
function (\){T<:Union(Float64,Float32,Complex128,Complex64)}(A::QRPDense{T},
B::StridedVecOrMat{T})
n = numel(A.tau)
## Replace this with a direct call to _jl_lapack_trtrs to save copying A.hh?
## Actually would need to call the appropriate Lapack subroutine to save copying.
triu(A.hh[1:n,:]) \ (A' * B)[1:n]
end

function (\){T<:Union(Float64,Float32,Complex128,Complex64)}(A::QRP{T},
function (\){T<:Union(Float64,Float32,Complex128,Complex64)}(A::QRPDense{T},
B::StridedVecOrMat{T})
## may be better to define one method for B::Vector{T} and another for StridedMatrix
BV = isa(B, Vector)
Expand All @@ -288,13 +331,12 @@ type LDLTTridiagonal{T} <: Factorization{T}
D::Vector{T}
E::Vector{T}
end
function LDLTTridiagonal{T<:LapackScalar}(A::Tridiagonal{T})
function ldlt{T<:LapackScalar}(A::Tridiagonal{T})
D = copy(A.d)
E = copy(A.dl)
_jl_lapack_pttrf(D, E)
LDLTTridiagonal(D, E)
LDLTTridiagonal{T}(D, E)
end
LDLT(A::Tridiagonal) = LDLTTridiagonal(A)

(\){T<:LapackScalar}(C::LDLTTridiagonal{T}, B::StridedVecOrMat{T}) =
_jl_lapack_pttrs(C.D, C.E, copy(B))
Expand All @@ -304,25 +346,21 @@ type LUTridiagonal{T} <: Factorization{T}
ipiv::Vector{Int32}
function LUTridiagonal(lu::Tridiagonal{T}, ipiv::Vector{Int32})
m, n = size(lu)
m == numel(ipiv) ? new(lu, ipiv) : error("LU: dimension mismatch")
m == numel(ipiv) ? new(lu, ipiv) : error("LUTridiagonal: dimension mismatch")
end
end
show(io, lu::LUTridiagonal) = print(io, "LU decomposition of ", summary(lu.lu))

function LU{T<:LapackScalar}(A::Tridiagonal{T})
function lu{T<:LapackScalar}(A::Tridiagonal{T})
lu, ipiv = _jl_lapack_gttrf(copy(A))
LUTridiagonal{T}(lu, ipiv)
end

function lu(A::Tridiagonal)
error("lu(A) is not defined when A is Tridiagonal. Use LU(A) instead.")
end

function det(lu::LUTridiagonal)
prod(lu.lu.d) * (bool(sum(lu.ipiv .!= 1:n) % 2) ? -1 : 1)
end

det(A::Tridiagonal) = det(LU(A))
det(A::Tridiagonal) = det(lu(A))

(\){T<:LapackScalar}(lu::LUTridiagonal{T}, B::StridedVecOrMat{T}) =
_jl_lapack_gttrs('N', lu.lu, lu.ipiv, copy(B))
2 changes: 2 additions & 0 deletions base/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ triu(M::AbstractMatrix) = triu(M,0)
tril(M::AbstractMatrix) = tril(M,0)
#triu{T}(M::AbstractMatrix{T}, k::Integer)
#tril{T}(M::AbstractMatrix{T}, k::Integer)
triu!(M::AbstractMatrix) = triu!(M,0)
tril!(M::AbstractMatrix) = tril!(M,0)

#diff(a::AbstractVector)
#diff(a::AbstractMatrix, dim::Integer)
Expand Down
20 changes: 20 additions & 0 deletions base/linalg_dense.jl
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,26 @@ triu{T}(M::Matrix{T}, k::Integer) = [ j-i >= k ? M[i,j] : zero(T) for
i=1:size(M,1), j=1:size(M,2) ]
tril{T}(M::Matrix{T}, k::Integer) = [ j-i <= k ? M[i,j] : zero(T) for
i=1:size(M,1), j=1:size(M,2) ]
function triu!{T}(M::Matrix{T}, k::Integer)
m, n = size(M)
for i = 1:m
for j = 1:n
if j-i < k
M[i,j] = zero(T)
end
end
end
end
function tril!{T}(M::Matrix{T}, k::Integer)
m, n = size(M)
for i = 1:m
for j = 1:n
if j-i > k
M[i,j] = zero(T)
end
end
end
end

diff(a::Vector) = [ a[i+1] - a[i] for i=1:length(a)-1 ]

Expand Down
Loading

17 comments on commit 826c42f

@JeffBezanson
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The stockcorr benchmark uses A*chol(B). How should this be written now?

@StefanKarpinski
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just guessing from the above changes to test/lapack.jl, but I'm guessing you can slap a factor(...) on that puppy and leave it as is. Probably not the fastest way to do it anymore though.

@timholy
Copy link
Member Author

@timholy timholy commented on 826c42f Sep 6, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stefan is right, writing A*factors(chol(B)) works with the code in the repository now. There's no difference in speed from what you're used to because factors simply gives you a reference to the matrix you've always been working with (well, OK, there's the overhead of looking up a field within a type, but that will go away soon :-) ).

I'm sure this is obvious to you, but for anyone who hasn't read the code over carefully a bit of explanation is in order: in general, the intention is that these factorizations are usable just like ordinary matrices, and indeed are a stand-in for the original matrix you were decomposing: you can say

QR = qr(A)
x = QR\rhs
xA = A\rhs
@assert norm(x - xA) < sqrt(eps())

and be happy. In the second line the equation is solved using the pre-computed QR decomposition. So from one perspective it's just a missing method problem, a one-liner that defines the product of the decomposition between CholeskyDense and a StridedVecOrMat.

BUT your question points out what should have been obvious: chol is different from the rest of the decompositions because it conventionally doesn't return a decomposition equal to R'*R (which equals the original matrix), it just returns R. So if I do define that missing method, in fact it means something different from all the other decompositions. Hmm. Annoying problem. It's not actually a new problem, because we had Cholesky defined in the tree previously (I renamed it to CholeskyDense), and there was a \ operator already defined, and the same conceptual problem applies here too.

I guess I see three possible paths:

  1. Given that chol is simply different from all other decompositions, forget about using a type to handle its return and just go back to returning a dense matrix. The disadvantage is that any future matrix type whose Cholesky decomposition can't be best represented by a single output (I don't have a candidate in mind now) will then present with API breakage. (The closest we have is Tridiagonal, but there the best choice is ldlt decomposition, so I don't think this really counts as an example yet. As an aside, someday we're going to want ldlt decomposition for dense matrices, it has some fairly significant accuracy advantages over Cholesky decomposition aka llt decomposition.)
  2. Stick with what's in the repository now, and force people to write factor everywhere they want to use R. Currently that's called factors, but again factors suffers from the conceptual problem that it should return both R' and R (a rather pointless exercise). Naming the method factor would at least be conceptually correct.
  3. Define * for CholeskyDense even though the meaning is different.

I kind of think that 3 is the worst of them. I'm personally torn between 1 and 2. Advice?

@timholy
Copy link
Member Author

@timholy timholy commented on 826c42f Sep 6, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additions:

  1. Regarding this issue having already been in the tree, I forgot to point out that chol bypassed the Cholesky type, so the most common usage did not suffer from this problem.
  2. There's a fourth option: get rid of the chol function altogether and call it something else. cholfactor? Not ideal, either.

@timholy
Copy link
Member Author

@timholy timholy commented on 826c42f Sep 6, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, much of the above was not well thought-out. First a correction: the \ operator for CholeskyDense was doing the "right" thing, i.e., consistent with all the other decompositions. In other words, if C = chol(A), then C\x is equivalent to A\x except that the decomposition is precomputed.

So the issue is much smaller than I implied. Perhaps the only real problem is the name chosen for factors, which seems to imply that if you multiply the outputs together you'll get the original matrix back. While that's manifestly not true for chol, it's also not true for lu because the final output is a permutation vector.

Should we change it to components or something?

@toivoh
Copy link
Contributor

@toivoh toivoh commented on 826c42f Sep 6, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm beginning to think that it might be better to have different getter functions for different decompositions, e g

L, U, P = lu_factors(lu(A))
R       = cholfactors(chol(A))

Ok... that looks pretty hideous. I guess that lu_factors could be overloaded to work on plain matrices too, then we would almost be back to the old lu, but more verbose... (Or perhaps verbose is good if you want to get people to use factorization objects directly?)

My point is that while L, U, P = lu(A) and R = chol(A) were well-defined operations,
the methods for factors(X) are not the same operation on different types, but different operations on different types.
What if you write a function that expects an LU decomposition, and takes it apart using factors,
but the user supplies a QR decomposition, or a Cholesky?

@timholy
Copy link
Member Author

@timholy timholy commented on 826c42f Sep 6, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if you write a function that expects an LU decomposition, and takes it apart using factors,
but the user supplies a QR decomposition, or a Cholesky?

You could write your function as

function myfunc(LU::LUDense)
    ...
end

and that would prevent the problem. In a sense, this illustrates one of the nice things about the new framework, it's actually safer than the old version. Perhaps we should implement an abstract LUDecomposition type, however, so that any matrix representation can be used.

I've moved on to a bigger worry, however: the behavior "chol(A)\x is equivalent to A\x" is going to be very surprising to people directly porting code from Matlab! So while we've achieved self-consistency, it's come at the expense of Matlab compatibility, and in an insidious way.

Perhaps rename chol to be cholesky?

@dmbates
Copy link
Member

@dmbates dmbates commented on 826c42f Sep 6, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm coming in a bit late to the discussion and I need to catch up on the big picture. The types that were called LU, QR, QRP and Cholesky are now LUDense, QRDense, ... which is a good thing. It means that the sparse matrix factorizations can be renamed for consistency.

If the intention is that the lu, qr, qrp and chol functions always return a type that specializes Factorization{T} then the factors extractor must be applied to get Matrix or sparse matrix results. A Factorization{T} is a representation of one or more matrices that can be used, in some way, to reproduce the original matrix. To me the only question is whether you want to use those names which have a different meaning in Matlab (and in R, for chol and qr, at least). I think it is worth the pain for those transitioning.

By the way, I opened issue #1248 three days ago regarding the naming of objects in BLAS and Lapack modules and there haven't been any responses. Any comments from the usual suspects? I think I will just go ahead with a proof of concept implementation and see what others think then.

@StefanKarpinski
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with Doug that having the factorization object returned behave like the original matrix but with special efficiencies when used in certain operations is good. Even if chol doesn't traditionally work like that, the consistency is worth it. People who use this sort of thing will appreciate that consistency and be willing to sacrifice a little transitional pain. Then again, I'm not one of those people, so maybe I'm wrong...

@timholy
Copy link
Member Author

@timholy timholy commented on 826c42f Sep 6, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, it sounds like there's (surprisingly?) widespread agreement that this is the right direction; in case it wasn't clear because of my laundry list of issues, I think this is a big win, too. And it's especially nice given that's what we now have in the repository :-). Hmm, except for svd. I propose we do that one, too, although I personally won't get to it for several days or more.

What about guarding against subtle bugs with the name change from chol to cholesky or something? I think this may be the only "dangerous" case of practical importance, because it's the only case I can think of where common usage in other languages would be to collect a single output. For the others (e.g., lu) most of the time 2 or 3 outputs will be collected, and in Julia that will now cause an error (which is far better than a bug). We could define chol in the following way:

chol(A...) = error("You probably want cholesky() instead, but it may behave differently than you expect. Read the documentation.")

If that's not enough warning for people migrating from other languages, I don't know what is.

@nolta
Copy link
Member

@nolta nolta commented on 826c42f Sep 6, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why we're making this change. What problem are we solving by breaking chol? If users want this functionality, let them ask for it by writing Cholesky(x).

@timholy
Copy link
Member Author

@timholy timholy commented on 826c42f Sep 6, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Briefly (gotta run), it's because chol and friends combine an "operation" (performing the decomposition) with a representation (e.g., as a product of a certain number of matrices). As we add specialized matrix types, this gets to be an increasingly untenable situation. I think the original discussion started in pull request #1243.

With this brief response I'm not try to shut down dissent (I welcome it, and there's been surprisingly little of it so far), I just need to run and anyway should give you a chance to read over previous discussions.

@nolta
Copy link
Member

@nolta nolta commented on 826c42f Sep 7, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reading #1243, it appears the decision to break matlab compatibility was taken on a whim.

If we can jettison chol entirely,

We could define chol in the following way:
chol(A...) = error("You probably want cholesky() instead, but it may behave differently than you expect. Read the documentation.")

then we can also retain it in its old form.

@timholy
Copy link
Member Author

@timholy timholy commented on 826c42f Sep 7, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might quibble with "whim," since aiming for API consistency is not entirely whimsical. But I too am concerned about breaking Matlab compatibility---it's why I brought my concerns up for discussion. Let's see if we can fix that without breakage or too much uglification.

In a nutshell, here's the problem: different types of matrices may require different representations for various decompositions. For example, for a dense matrix A the Cholesky decomposition is ideally represented in terms of a single matrix output R for which R'*R = A. Similarly, the lu decomposition in Matlab is expressed as L*U = A. One small point (one that does not concern me deeply, but is worth mentioning) is that using complete matrices for both L and U is wasteful; indeed, Lapack internally represents the LU decomposition as a single dense matrix. We split that out into two matrices, which takes a little bit of time (irrelevant compared to the overhead of the decomposition) and space (which might be important for people pushing the limits of their machines).

More importantly, consider tridiagonal matrices. The Cholesky decomposition is best represented as two vectors, so a low-level routine would look like d, e = chol!(d, e). Obviously, this is a different API. Similarly, the LU decomposition is represented in Lapack using 4 vectors. This creates problems if you want to write routines that take a matrix as an input, but don't want to have to write specialized versions of your whole routine for each possible representation of a matrix.

So the idea is to abstract this operation. Given one matrix input (where that matrix might be of any type; I suspect Tridiagonal is only the beginning of our specialized types, see #1248), a decomposition is represented as a single output (e.g., of type LUTridiagonal). Then you're guaranteed to be able to capture the result with a single output, and internally the result can use the most performant/compact/Lapack-compatible representation of the decomposition possible.

Now, let's consider LU decomposition in more detail: the combined output (which stores both L and U) is a representation of the original matrix, and it's sensible that it should therefore behave as the original matrix A when, for example, you use it to solve equations:

ludcmp = lu(A)
y = ludcmp\x

But, this creates the conundrum we're discussing now (and it's fair to say I didn't foresee the problems this would cause specifically for chol). For consistency with the other decompositions, in choldcmp = chol(A), choldcmp should behave as A, not the "square root" of A, because for positive-definite matrices

choldcmp = chol(A)
y = choldcmp\x    # Matlab version: R = chol(A); y = R\(R'\x);

is a great choice for solving equations (approximately twice as efficient as LU-decomposition). However, in Matlab syntax chol returns R, the "square root" of A. This introduces the API consistency problem.

For most decompositions, breaking Matlab compatibility may not be a big problem because nothing else takes a single output: people who port their code will get an error, and that gives them the opportunity to fix the problem. But the common usage of chol is to collect a single output, and hence this introduces a real concern that people will be bitten by bugs rather than errors. If Matlab returned [R, Rprime] = chol(A) and people always collected both (of course that would be weird), we might not be having this discussion.

One compromise position is to keep Matlab compatibility for dense matrices for chol and likely all other decompositions, and then provide routines with alternate names that behave more sensibly over a wider range of matrix types. That would effectively revert this patch. It's a bit ugly to have two different APIs for matrix decomposition, and it's not the advice I got in #1243 (which led to this commit). But I'm willing to be talked out of it, especially because I think none of us clearly foresaw the potential for problems with chol (speaking for myself, anyway). If you like this route, please do suggest some specifics, e.g., how the various functions should be named.

When I did my squash, I deliberately kept this patch out of it; glad I did!

@timholy
Copy link
Member Author

@timholy timholy commented on 826c42f Sep 7, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, rather than revert, a much smarter approach would be to build the Matlab-compatible syntax on top of the current (new) infrastructure:

lu(A::StridedMatrix) = factors(lud(A))
chol(A::StridedMatrix) = factors(chold(A))
qr(A::StridedMatrix) = factors(qrd(A))
qrp(A::StridedMatrix) = factors(qrpd(A))

This restores Matlab compatibility with only 4 lines (5 if we add svd), and keeps all the serious code together inside a better API. Supplying something other than a StridedMatrix would likely give an error for lu but not for lud; hopefully via documentation users will discover lud for the broader array of matrix types (or we could define functions that give an error pointing them in the right direction).

Personally I like this strategy. Perhaps I should have thought of this at the outset, but I suppose we arrived at this place by a fairly roundabout path. Any objections? If none, then...names? Do you like the convention of just tacking d on the end? And yes/no to the name factors? Alternatives might be components or parts; one reason to consider a change is that the pivot vector is not really a factor. But factors is also fairly appropriate. I see advantages either way.

@nolta
Copy link
Member

@nolta nolta commented on 826c42f Sep 7, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, rather than revert, a much smarter approach would be to build the Matlab-compatible syntax on top of the current (new) infrastructure: ...

Great, i think we're converging. I'm fine w/ this approach.

Do you like the convention of just tacking d on the end?

Personally, i like LongMathematicaStyleNames, but it's a mild preference, and i gather EveryoneElseHatesThem.

And yes/no to the name factors? Alternatives might be components or parts; one reason to consider a change is that the pivot vector is not really a factor.

I don't know. Part of me wants individual getters, e.g., p = pivot(lud(x)), while another part thinks the factorizations should be opaque, i.e., get rid of factors.

@timholy
Copy link
Member Author

@timholy timholy commented on 826c42f Sep 7, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, i think we're converging. I'm fine w/ this approach.

Very good. I'll give 24 hours or so to see if any objections roll in.

while another part thinks the factorizations should be opaque, i.e., get rid of factors.

In general yes, this new framework allows decompositions to typically remain opaque to everyone except the routines written to use them. But if we don't have factors (or whatever it gets called), we won't have the Matlab-compatible syntax (which is decidedly non-opaque).

Please sign in to comment.