diff --git a/src/mfccs.jl b/src/mfccs.jl index 9878a15..c07c7ae 100644 --- a/src/mfccs.jl +++ b/src/mfccs.jl @@ -12,14 +12,15 @@ using DSP ## Recoded from rastamat's "melfcc.m" (c) Dan Ellis. ## Defaults here are HTK parameters, this is contrary to melfcc function mfcc(x::Vector{T}, sr::Real=16000.0; wintime=0.025, steptime=0.01, numcep=13, - lifterexp=-22, sumpower=false, preemph=0.97, dither=false, minfreq=0.0, maxfreq=sr/2, - nbands=20, bwidth=1.0, dcttype=3, fbtype=:htkmel, - usecmp=false, modelorder=0) where {T<:AbstractFloat} - if (preemph != 0) + lifterexp=-22, preemph=0.97, minfreq=0.0, maxfreq=sr/2, nbands=20, + bwidth=1.0, dcttype=3, fbtype=:htkmel, modelorder=0, sumpower::Bool=false, + dither::Bool=false, usecmp::Bool=false) where {T<:AbstractFloat} + if !iszero(preemph) x = filt(PolynomialRatio([1., -preemph], [1.]), x) end - pspec = powspec(x, sr, wintime=wintime, steptime=steptime, dither=dither) - aspec = audspec(pspec, sr, nfilts=nbands, fbtype=fbtype, minfreq=minfreq, maxfreq=maxfreq, sumpower=sumpower, bwidth=bwidth) + pspec = powspec(x, sr; wintime=wintime, steptime=steptime, dither=dither) + aspec = audspec(pspec, sr; nfilts=nbands, fbtype=fbtype, minfreq=minfreq, + maxfreq=maxfreq, sumpower=sumpower, bwidth=bwidth) if usecmp # PLP-like weighting/compression aspec = postaud(aspec, maxfreq, fbtype) @@ -44,12 +45,13 @@ function mfcc(x::Vector{T}, sr::Real=16000.0; wintime=0.025, steptime=0.01, numc return (cepstra, pspec', meta) end -mfcc(x::Array{T}, sr::Real=16000.0; args...) where {T<:AbstractFloat} = @distributed (tuple) for i=1:size(x)[2] mfcc(x[:,i], sr; args...) end +mfcc(x::AbstractVector{<:AbstractFloat}, sr::Real=16000.0; args...) = mfcc(Vector(x), sr; args...) +mfcc(x::AbstractMatrix{<:AbstractFloat}, sr::Real=16000.0; args...) = @distributed (tuple) for i in axes(x, 2) mfcc(x[:, i], sr; args...) end ## default feature configurations, :rasta, :htk, :spkid_toolkit, :wbspeaker ## With optional extra agrs... you can specify more options -function mfcc(x::Vector{T}, sr::AbstractFloat, defaults::Symbol; args...) where {T<:AbstractFloat} +function mfcc(x::AbstractVector{<:AbstractFloat}, sr::AbstractFloat, defaults::Symbol; args...) if defaults == :rasta mfcc(x, sr; lifterexp=0.6, sumpower=true, nbands=40, dcttype=2, fbtype=:mel, args...) elseif defaults ∈ [:spkid_toolkit, :nbspeaker] @@ -64,48 +66,45 @@ function mfcc(x::Vector{T}, sr::AbstractFloat, defaults::Symbol; args...) where end ## our features run down with time, this is essential for the use of DSP.filt() -function deltas(x::Matrix{T}, w::Int=9) where {T<:AbstractFloat} - (nr, nc) = size(x) - if nr == 0 || w <= 1 +function deltas(x::AbstractMatrix{T}, w::Int=9) where {T<:AbstractFloat} + nr, nc = size(x) + if iszero(nr) || w <= 1 return x end hlen = floor(Int, w/2) w = 2hlen + 1 # make w odd win = collect(convert(T, hlen):-1:-hlen) - x1 = reshape(x[1, :], 1, size(x, 2)) ## julia v0.4 v0.5 changeover - xend = reshape(x[end,:], 1, size(x, 2)) - xx = vcat(repeat(x1, hlen, 1), x, repeat(xend, hlen, 1)) ## take care of boundaries + x1 = x[1:1, :] + xend = x[end:end,:] + xx = vcat(repeat(x1, hlen), x, repeat(xend, hlen)) ## take care of boundaries norm = 3 / (hlen * w * (hlen + 1)) - return norm * filt(PolynomialRatio(win, [1.]), xx)[2hlen .+ (1:nr), :] + delta_v = filt(PolynomialRatio(win, [1.]), xx)[2hlen .+ (1:nr), :] + @. delta_v *= norm + return delta_v end +sortperm_along(a::AbstractArray, dim::Int) = (v=similar(a, Int, size(a, dim)); mapslices(x->sortperm!(v, x), a; dims=dim)) -import Base.Sort.sortperm -sortperm(a::Array, dim::Int) = mapslices(sortperm, a, dims=dim) - -erfinvtabs = Dict{Int,Vector{Float64}}() +erfinvtabs = Dict{Int, Vector{Float64}}() function erfinvtab(wl::Int) global erfinvtabs - if wl ∉ keys(erfinvtabs) - erfinvtabs[wl] = √2 * erfinv.(2collect(1:wl) / (wl + 1) .- 1) + if !haskey(erfinvtabs, wl) + erfinvtabs[wl] = @. √2 * erfinv(2(1:wl) / (wl + 1) - 1) end return erfinvtabs[wl] end -function warpstats(x::Matrix{T}, w::Int=399) where {T<:Real} - nx, nfea = size(x) +function warpstats(x::AbstractMatrix{<:Real}, w::Int=399) + nx = size(x, 1) wl = min(w, nx) - hw = (wl+1) / 2 - rank = similar(x, Int) if nx < w - index = sortperm(x, 1) - for j in 1:nfea - rank[index[:,j], j] = collect(1:nx) - end + rank = sortperm_along(x, 1) else - for j in 1:nfea # operations in outer loop over columns, better for memory cache + rank = similar(x, Int) + hw = round(Int, (wl+1) / 2) + for j in axes(x, 2) # operations in outer loop over columns, better for memory cache for i in 1:nx - s = max(1, i - round(Int, hw) + 1) + s = max(1, i - hw + 1) e = s + w - 1 if e > nx d = e - nx @@ -114,7 +113,9 @@ function warpstats(x::Matrix{T}, w::Int=399) where {T<:Real} end r = 1 for k in s:e - r += x[i, j] > x[k, j] + if x[i, j] > x[k, j] + r += 1 + end end rank[i, j] = r end @@ -123,20 +124,25 @@ function warpstats(x::Matrix{T}, w::Int=399) where {T<:Real} return rank, erfinvtab(wl) end -function warp(x::Matrix{T}, w::Int=399) where {T<:Real} +function warp(x::AbstractMatrix{<:Real}, w::Int=399) rank, erfinvtab = warpstats(x, w) return erfinvtab[rank] end -warp(x::Vector{T}, w::Int=399) where {T<:Real} = warp(x'', w) +warp(x::AbstractVector{<:Real}, w::Int=399) = warp(reshape(x, :, 1), w) -function WarpedArray(x::Matrix, w::Int) +function WarpedArray(x::AbstractMatrix{<:Real}, w::Int) rank, erfinvtab = warpstats(x, w) WarpedArray(rank, erfinvtab) end ## mean and variance normalization -znorm(x::Array, dim::Int=1) = znorm!(copy(x), dim) -znorm!(x::Array, dim::Int=1) = broadcast!(/, x, broadcast!(-, x, x, mean(x, dims=dim)), std(x, dims=dim)) +znorm(x::AbstractArray, dim::Int=1) = znorm!(copy(x), dim) +function znorm!(x::AbstractArray, dim::Int=1) + mean_x = mean(x; dims=dim) + std_x = std(x; dims=dim) + @. x = (x - mean_x) / std_x + x +end ## short-term mean and variance normalization function stmvn(x::Vector, w::Int=399) @@ -170,15 +176,15 @@ end stmvn(m::Matrix, w::Int=399, dim::Int=1) = mapslices(x->stmvn(x, w), m, dims=dim) ## Shifted Delta Coefficients by copying -function sdc(x::Matrix{T}, n::Int=7, d::Int=1, p::Int=3, k::Int=7) where {T<:AbstractFloat} +function sdc(x::AbstractMatrix{T}, n::Int=7, d::Int=1, p::Int=3, k::Int=7) where {T<:AbstractFloat} nx, nfea = size(x) n ≤ nfea || error("Not enough features for n") hnfill = (k-1) * p / 2 nfill1, nfill2 = floor(Int, hnfill), ceil(Int, hnfill) - xx = vcat(zeros(eltype(x), nfill1, n), deltas(x[:,1:n], 2d+1), zeros(eltype(x), nfill2, n)) - y = zeros(eltype(x), nx, n*k) + xx = vcat(zeros(T, nfill1, n), deltas(x[:,1:n], 2d+1), zeros(T, nfill2, n)) + y = zeros(T, nx, n*k) for (dt, offset) in zip(0:p:p*k-1, 0:n:n*k-1) - y[:, offset+(1:n)] = xx[dt+(1:nx), :] + y[:, offset.+(1:n)] = xx[dt.+(1:nx), :] end return y end diff --git a/src/rasta.jl b/src/rasta.jl index d9eb3c4..d83df62 100644 --- a/src/rasta.jl +++ b/src/rasta.jl @@ -12,11 +12,11 @@ using DSP using FFTW using LinearAlgebra -function powspec(x::Vector{T}, sr::Real=8000.0; wintime=0.025, steptime=0.01, dither=true) where {T<:AbstractFloat} +function powspec(x::AbstractVector{<:AbstractFloat}, sr::Real=8000.0; wintime=0.025, steptime=0.01, dither=true) nwin = round(Integer, wintime * sr) nstep = round(Integer, steptime * sr) - nfft = 2 .^ Integer((ceil(log2(nwin)))) + nfft = nextpow(2, nwin) window = hamming(nwin) # overrule default in specgram which is hanning in Octave noverlap = nwin - nstep @@ -29,8 +29,9 @@ function powspec(x::Vector{T}, sr::Real=8000.0; wintime=0.025, steptime=0.01, di end # audspec tested against octave with simple vectors for all fbtypes -function audspec(x::Matrix{T}, sr::Real=16000.0; nfilts=ceil(Int, hz2bark(sr/2)), fbtype=:bark, - minfreq=0., maxfreq=sr/2, sumpower=true, bwidth=1.0) where {T<:AbstractFloat} +function audspec(x::AbstractMatrix{<:AbstractFloat}, sr::Real=16000.0; + nfilts=ceil(Int, hz2bark(sr / 2)), fbtype=:bark, + minfreq=0., maxfreq=sr/2, sumpower::Bool=true, bwidth=1.0) nfreqs, nframes = size(x) nfft = 2(nfreqs-1) if fbtype == :bark @@ -75,40 +76,49 @@ end hz2bark(f) = 6 * asinh(f / 600) bark2hz(bark) = 600 * sinh(bark / 6) -function fft2melmx(nfft::Int, nfilts::Int; sr=8000.0, width=1.0, minfreq=0.0, maxfreq=sr/2, htkmel=false, constamp=false) +function slope_gen(fs, fftfreq) + f1, f2, f3 = fs + # lower and upper slopes for all bins + loslope = (fftfreq - f1) / (f2 - f1) + hislope = (f3 - fftfreq) / (f3 - f2) + # then intersect them with each other and zero + max(0, min(loslope, hislope)) +end + +function fft2melmx(nfft::Int, nfilts::Int; sr=8000.0, width=1.0, minfreq=0.0, + maxfreq=sr/2, htkmel::Bool=false, constamp::Bool=false) wts = zeros(nfilts, nfft) + lastind = (nfft>>1) # Center freqs of each DFT bin - fftfreqs = collect(0:nfft-1) / nfft * sr; + fftfreqs = collect(0:lastind-1) / nfft * sr; # 'Center freqs' of mel bands - uniformly spaced between limits minmel = hz2mel(minfreq, htkmel); maxmel = hz2mel(maxfreq, htkmel); - binfreqs = mel2hz(minmel .+ collect(0:(nfilts+1)) / (nfilts + 1) * (maxmel - minmel), htkmel); + binfreqs = @. mel2hz(minmel + (0:(nfilts+1)) / (nfilts + 1) * (maxmel - minmel), htkmel); ## binbin = iround(binfrqs/sr*(nfft-1)); for i in 1:nfilts - fs = binfreqs[i .+ (0:2)] + fs = binfreqs[i], binfreqs[i+1], binfreqs[i+2] # scale by width - fs = fs[2] .+ (fs .- fs[2])width - # lower and upper slopes for all bins - loslope = (fftfreqs .- fs[1]) / (fs[2] - fs[1]) - hislope = (fs[3] .- fftfreqs) / (fs[3] - fs[2]) - # then intersect them with each other and zero - wts[i,:] = max.(0, min.(loslope, hislope)) + fs = @. fs[2] + (fs - fs[2])width + for j in eachindex(fftfreqs) + wts[i, j] = slope_gen(fs, fftfreqs[j]) + end end if !constamp ## unclear what this does... ## Slaney-style mel is scaled to be approx constant E per channel - wts = broadcast(*, 2 ./ ((binfreqs[3:nfilts+2]) - binfreqs[1:nfilts]), wts) + @. wts = 2 / ((binfreqs[3:nfilts+2]) - binfreqs[1:nfilts]) * wts + # Make sure 2nd half of DFT is zero + wts[:, lastind+1:nfft] .= 0. end - # Make sure 2nd half of DFT is zero - wts[:, (nfft>>1)+1:nfft] .= 0. return wts end -function hz2mel(f::Vector{T}, htk=false) where {T<:AbstractFloat} +function hz2mel(f::AbstractVector{<:AbstractFloat}, htk::Bool=false) if htk - return 2595 .* log10.(1 .+ f / 700) + return @. 2595 * log10(1 + f / 700) else f0 = 0.0 fsp = 200 / 3 @@ -116,40 +126,38 @@ function hz2mel(f::Vector{T}, htk=false) where {T<:AbstractFloat} brkpt = (brkfrq - f0) / fsp logstep = exp(log(6.4) / 27) linpts = f .< brkfrq - z = zeros(size(f)) # prevent InexactError() by making these Float64 - z[findall(linpts)] = f[findall(linpts)]/brkfrq ./ log(logstep) - z[findall(.!linpts)] = brkpt .+ log.(f[findall(.!linpts)] / brkfrq) ./ log(logstep) + z = zeros(axes(f)) # prevent InexactError() by making these Float64 + @. z[linpts] = f[linpts] / brkfrq / log(logstep) + @. z[!linpts] = brkpt + log(f[!linpts] / brkfrq) / log(logstep) end return z end -hz2mel(f::AbstractFloat, htk=false) = hz2mel([f], htk)[1] +hz2mel(f::AbstractFloat, htk::Bool=false) = hz2mel([f], htk)[1] -function mel2hz(z::Vector{T}, htk=false) where {T<:AbstractFloat} +function mel2hz(z::AbstractFloat, htk::Bool=false) if htk - f = 700 .* (10 .^ (z ./ 2595) .- 1) + f = @. 700 * (10 ^ (z / 2595) - 1) else f0 = 0.0 fsp = 200 / 3 brkfrq = 1000.0 brkpt = (brkfrq - f0) / fsp logstep = exp(log(6.4) / 27) - linpts = z .< brkpt - f = similar(z) - f[linpts] = f0 .+ fsp * z[linpts] - f[.!linpts] = brkfrq .* exp.(log.(logstep) * (z[.!linpts] .- brkpt)) + linpt = z < brkpt + f = linpt ? f0 + fsp * z : brkfrq * exp(log(logstep) * (z - brkpt)) end return f end function postaud(x::Matrix{T}, fmax::Real, fbtype=:bark, broaden=false) where {T<:AbstractFloat} - (nbands,nframes) = size(x) + nbands, nframes = size(x) nfpts = nbands + 2broaden if fbtype == :bark bandcfhz = bark2hz(range(0, stop=hz2bark(fmax), length=nfpts)) elseif fbtype == :mel - bandcfhz = mel2hz(range(0, stop=hz2mel(fmax), length=nfpts)) + bandcfhz = mel2hz.(range(0, stop=hz2mel(fmax), length=nfpts)) elseif fbtype == :htkmel || fbtype == :fcmel - bandcfhz = mel2hz(range(0, stop=hz2mel(fmax,1), length=nfpts),1); + bandcfhz = mel2hz.(range(0, stop=hz2mel(fmax, true), length=nfpts),1); else error("Unknown filterbank type") end @@ -158,11 +166,11 @@ function postaud(x::Matrix{T}, fmax::Real, fbtype=:bark, broaden=false) where {T # Hynek's magic equal-loudness-curve formula fsq = bandcfhz.^2 ftmp = fsq + 1.6e5 - eql = ((fsq ./ ftmp).^2) .* ((fsq + 1.44e6) ./ (fsq + 9.61e6)) + eql = @. ((fsq / ftmp)^2) * ((fsq + 1.44e6) / (fsq + 9.61e6)) # weight the critical bands - z = broadcast(*, eql, x) + z = eql .* x # cube root compress - z .^= 0.33 + @. z = cbrt(z) # replicate first and last band (because they are unreliable as calculated) if broaden z = z[[1, 1:nbands, nbands], :]; @@ -203,55 +211,57 @@ function lpc2cep(a::Array{T}, ncep::Int=0) where {T<:AbstractFloat} return c end -function spec2cep(spec::Array{T}, ncep::Int=13, dcttype::Int=2) where {T<:AbstractFloat} +function spec2cep(spec::AbstractMatrix{<:AbstractFloat}, ncep::Int=13, dcttype::Int=2) # no discrete cosine transform option - dcttype == -1 && return log(spec) + dcttype == -1 && return log.(spec) nr, nc = size(spec) dctm = similar(spec, ncep, nr) if 1 < dcttype < 4 # type 2,3 - for i in 1:ncep - dctm[i, :] = cos.((i - 1) * collect(1:2:2nr-1)π / (2nr)) * √(2/nr) + for j in 1:nr, i in 1:ncep + dctm[i, j] = cospi((i - 1) * (2j-1) / (2nr)) * √(2/nr) end if dcttype == 2 - dctm[1, :] ./= √2 + @. dctm[1, :] /= √2 + end + elseif dcttype == 4 # type 4 + for j in 1:nr, i in 1:ncep + dctm[i, j] = 2cospi((i-1) * j/(nr+1)) end - elseif dcttype == 4 # type 4 - for i in 1:ncep - dctm[i, :] = 2cos.((i-1) * collect(1:nr)π/(nr+1)) - dctm[i, 1] += 1 - dctm[i, nr] += (-1)^(i-1) + for i in axes(dctm, 1) + dctm[i, end] += (-1)^(i-1) end - dctm /= 2(nr + 1) + @. dctm[:, 1] += 1 + @. dctm /= 2(nr + 1) else # type 1 - for i in 1:ncep - dctm[i, :] = cos.((i-1) * collect(0:nr-1)π ./ (nr - 1)) ./ (nr - 1) + for j in 1:nr, i in 1:ncep + dctm[i, j] = cospi((i-1) * (j-1) / (nr - 1)) / (nr - 1) end - dctm[:, [1, nr]] /= 2 + @. dctm[:, [1, nr]] /= 2 end - return dctm * log.(spec) + # assume spec is not reused + return dctm * map!(log, spec, spec) end -function lifter(x::Array{T}, lift::Real=0.6, invs=false) where {T<:AbstractFloat} - (ncep, nf) = size(x) - if lift == 0 +function lifter(x::AbstractArray{<:AbstractFloat}, lift::Real=0.6, invs=false) + ncep, nf = size(x) + if iszero(lift) return x - end - if lift > 0 + elseif lift > 0 if lift > 10 - error("Too high lift number") + error("Lift number is too high (>10)") end - liftw = [1; collect(1:ncep-1).^lift] + liftw = pushfirst!((1:ncep-1).^lift, 1) else # Hack to support HTK liftering if !isa(lift, Integer) error("Negative lift must be interger") end lift = -lift # strictly speaking unnecessary... - liftw = vcat(1, (1 .+ lift/2 * sin.(collect(1:ncep-1)π / lift))) + liftw = @. 1 + lift / 2 * sinpi((0:ncep-1) / lift) end if invs - liftw = 1 ./ liftw + @. liftw = inv(liftw) end y = broadcast(*, x, liftw) return y @@ -292,17 +302,17 @@ function levinson(acf::Vector{T}, p::Int) where {T<:Real} end pushfirst!(a, 1) end - return (a, v) + return a, v end function levinson(acf::Array{T}, p::Int) where {T<:Real} - (nr,nc) = size(acf) + nr, nc = size(acf) a = zeros(p + 1, nc) v = zeros(p + 1, nc) for i in 1:nc a[:,i], v[:,i] = levinson(acf[:,i], p) end - return (a, v) + return a, v end ## Freely after octave's implementation, ver 3.2.4, by jwe && jh @@ -310,8 +320,8 @@ end function toeplitz(c::Vector{T}, r::Vector{T}=c) where {T<:Real} nc = length(r) nr = length(c) - res = zeros(typeof(c[1]), nr, nc) - if nc == 0 || nr == 0 + res = zeros(T, nr, nc) + if iszero(nc) || iszero(nr) return res end if r[1] != c[1] @@ -319,7 +329,6 @@ function toeplitz(c::Vector{T}, r::Vector{T}=c) where {T<:Real} end if typeof(c) <: Complex conj!(c) - c[1] = conj(c[1]) # bug in julia? end ## if issparse(c) && ispsparse(r) data = [r[end:-1:2], c]