diff --git a/src/feacalc.jl b/src/feacalc.jl index 40f2a30..9a459a6 100644 --- a/src/feacalc.jl +++ b/src/feacalc.jl @@ -3,38 +3,42 @@ ## Feacalc. Feature calculation as used for speaker and language recognition. -nrow(x) = size(x,1) -ncol(x) = size(x,2) +nrow(x) = size(x, 1) +ncol(x) = size(x, 2) -## compute features according to standard settingsm directly from a wav file. -## this does channel at the time. +""" +compute features according to standard settings directly from a wav file. +this does channel at the time. +""" function feacalc(wavfile::AbstractString; method=:wav, kwargs...) - if method == :wav - (x, sr) = wavread(wavfile) + x, sr = if method == :wav + wavread(wavfile) elseif method == :sox - (x, sr) = soxread(wavfile) + soxread(wavfile) elseif method == :sphere - (x, sr) = sphread(wavfile) + sphread(wavfile) end sr = Float64(sr) # more reasonable sr feacalc(x; sr=sr, source=wavfile, kwargs...) end ## assume we have an array already -function feacalc(x::Array; augtype=:ddelta, normtype=:warp, sadtype=:energy, dynrange::Real=30., nwarp::Int=399, chan=:mono, sr::AbstractFloat=8000.0, source=":array", defaults=:nbspeaker, mfccargs...) +function feacalc(x::AbstractArray; augtype=:ddelta, normtype=:warp, sadtype=:energy, + dynrange::Real=30., nwarp::Int=399, chan=:mono, sr::AbstractFloat=8000.0, + source=":array", defaults=:nbspeaker, mfccargs...) if ndims(x) > 1 nsamples, nchan = size(x) if chan == :mono - x = vec(mean(x, dims=2)) # averave multiple channels for now - elseif in(chan, [:a, :b]) - channum = findall(in([chan]), [:a, :b]) - x = vec(x[:,channum]) - elseif isa(chan, Integer) + x = vec(mean(x; dims=2)) # average multiple channels for now + elseif chan in (:a, :b) + channum = chan == :a ? 1 : 2 + x = x[:,channum] + elseif chan isa Integer if !(chan in 1:nchan) error("Bad channel specification: ", chan) end - x = vec(x[:,chan]) - chan=[:a, :b][chan] + x = x[:,chan] + chan = (:a, :b)[chan] else error("Unknown channel specification: ", chan) end @@ -42,17 +46,17 @@ function feacalc(x::Array; augtype=:ddelta, normtype=:warp, sadtype=:energy, dyn nsamples, nchan = length(x), 1 end ## save some metadata - meta = Dict("nsamples" => nsamples, "sr" => sr, "source" => source, "nchan" => nchan, - "chan" => chan) + meta = Dict("nsamples" => nsamples, "sr" => sr, "source" => source, + "nchan" => nchan, "chan" => chan) preemph = 0.97 preemph ^= 16000. / sr ## basic features - (m, pspec, params) = mfcc(x, sr, defaults; preemph=preemph, mfccargs...) + m, pspec, params = mfcc(x, sr, defaults; preemph=preemph, mfccargs...) meta["totnframes"] = nrow(m) ## augment features - if augtype==:delta || augtype==:ddelta + if augtype in (:delta, :ddelta) d = deltas(m) if augtype==:ddelta dd = deltas(d) @@ -67,14 +71,7 @@ function feacalc(x::Array; augtype=:ddelta, normtype=:warp, sadtype=:energy, dyn if nrow(m)>0 if sadtype==:energy - ## integrate power - deltaf = size(pspec,2) / (sr/2) - minfreqi = round(Int, 300deltaf) - maxfreqi = round(Int, 4000deltaf) - power = 10log10(sum(pspec[:,minfreqi:maxfreqi], 2)) - - maxpow = maximum(power) - speech = findall(power .> maxpow - dynrange) + speech = sad(pspec, sr; dynrange=dynrange) params["dynrange"] = dynrange elseif sadtype==:none speech = collect(1:nrow(m)) @@ -86,14 +83,14 @@ function feacalc(x::Array; augtype=:ddelta, normtype=:warp, sadtype=:energy, dyn ## perform SAD m = m[speech,:] meta["speech"] = map(UInt32, speech) - meta["nframes"] , meta["nfea"] = size(m) + meta["nframes"], meta["nfea"] = size(m) ## normalization - if nrow(m)>0 - if normtype==:warp + if !iszero(nrow(m)) + if normtype == :warp m = warp(m, nwarp) params["warp"] = nwarp # the default - elseif normtype==:mvn + elseif normtype == :mvn if nrow(m)>1 znorm!(m,1) else @@ -102,60 +99,66 @@ function feacalc(x::Array; augtype=:ddelta, normtype=:warp, sadtype=:energy, dyn end meta["normtype"] = normtype end - return(map(Float32, m), meta, params) + return (map(Float32, m), meta, params) end ## When called with a specific application in mind, call with two arguments function feacalc(wavfile::AbstractString, application::Symbol; kwargs...) - if (application in [:speaker, :nbspeaker]) + if application in (:speaker, :nbspeaker) feacalc(wavfile; defaults=:nbspeaker, kwargs...) elseif application==:wbspeaker feacalc(wavfile; defaults=:wbspeaker, kwargs...) - elseif (application==:language) + elseif application == :language feacalc(wavfile; defaults=:rasta, nwarp=299, augtype=:sdc, kwargs...) - elseif (application==:diarization) + elseif application == :diarization feacalc(wavfile; defaults=:rasta, sadtype=:none, normtype=:mvn, augtype=:none, kwargs...) else error("Unknown application ", application) end end -function sad(pspec::Matrix{Float64}, sr::Float64, method=:energy; dynrange::Float64=30.) - deltaf = size(pspec,2) / (sr/2) +function sad(pspec::AbstractMatrix{T}, sr::T, method=:energy; dynrange::T=30.) where T<:Float64 + ## integrate power + deltaf = size(pspec, 2) / (sr/2) minfreqi = round(Int, 300deltaf) maxfreqi = round(Int, 4000deltaf) - power = 10log10(sum(pspec[:,minfreqi:maxfreqi], 2)) + summed_pspec = vec(sum(view(pspec, :, minfreqi:maxfreqi); dims=2)) + power = @. 10log10(summed_pspec) maxpow = maximum(power) - speech = findall(power .> maxpow - dynrange) + activity = power .> maxpow - dynrange + speech = findall(activity) + return speech end ## listen to SAD function sad(wavfile::AbstractString, speechout::AbstractString, silout::AbstractString; dynrange::Float64=30.) - (x, sr, nbits) = wavread(wavfile) - sr = float64(sr) # more reasonable sr - x = vec(mean(x, dims=2)) # averave multiple channels for now - (m, pspec, meta) = mfcc(x, sr; preemph=0) + x, sr, nbits = wavread(wavfile) + sr = Float64(sr) # more reasonable sr + mx::Vector{Float64} = vec(mean(x; dims=2)) # average multiple channels for now + m, pspec, meta = mfcc(mx, sr; preemph=0) sp = sad(pspec, sr, dynrange=dynrange) sl = round(Int, meta["steptime"] * sr) - xi = falses(size(x)) - for i in sp - xi[(i-1)*sl+(1:sl)] = true + xi = falses(axes(mx)) + for i in @view sp[begin:end-1] + z = (i - 1) * sl + @. xi[z+1:z+sl] = true end - y = x[findall(xi)] - wavwrite(y, speechout, Fs=sr, nbits=nbits, compression=WAVE_FORMAT_PCM) - y = x[findall(.!xi)] - wavwrite(y, silout, Fs=sr, nbits=nbits, compression=WAVE_FORMAT_PCM) + z = (sp[end] - 1) * sl + xi[z+1:min(z+sl, lastindex(mx))] .= true + wavwrite(mx[xi], speechout, Fs=sr, nbits=nbits, compression=WAVE_FORMAT_PCM) + wavwrite(mx[.!xi], silout, Fs=sr, nbits=nbits, compression=WAVE_FORMAT_PCM) end ## this should probably be called soxread... function soxread(file) - nch = parse(Int, readall(`soxi -c $file`)) - sr = parse(i=Int, readall(`soxi -r $file`)) + nch = parse(Int, read(`soxi -c $file`, String)) + sr = parse(Int, read(`soxi -r $file`, String)) sox = `sox $file -t raw -e signed -r $sr -b 16 -` - fd, proc = open(sox, "r") x = Int16[] - while !eof(fd) - push!(x, read(fd, Int16)) + open(sox, "r") do fd, proc + while !eof(fd) + push!(x, read(fd, Int16)) + end end ns = length(x) ÷ nch reshape(x, nch, ns)' / (1<<15), sr diff --git a/src/io.jl b/src/io.jl index d09ec3b..c53a1f2 100644 --- a/src/io.jl +++ b/src/io.jl @@ -17,20 +17,20 @@ HDF5.write(fd::HDF5.File, s::AbstractString, ss::SubString) = write(fd, s, ascii ## x: the MFCC data ## meta: a dict with parameters anout the data, nsamples, nframes, totnframes (before sad), ... ## params: a dict with parameters about the feature extraction itself. -function feasave(file::AbstractString, x::Matrix{T}; meta::Dict=Dict(), params::Dict=Dict()) where {T<:AbstractFloat} +function feasave(file::AbstractString, x::AbstractMatrix{<:AbstractFloat}; meta::Dict=Dict(), params::Dict=Dict()) dir = dirname(file) - if length(dir)>0 && !isdir(dir) + if !(isempty(dir) || isdir(dir)) mkpath(dir) end - fd = HDF5.h5open(file, "w") - fd["features/data"] = map(Float32, x) - for (k,v) in meta - fd[string("features/meta/", k)] = v - end - for (k,v) in params - fd[string("features/params/", k)] = v + HDF5.h5open(file, "w") do fd + fd["features/data"] = map(Float32, x) + for (k, v) in meta + fd[string("features/meta/", k)] = v + end + for (k, v) in params + fd[string("features/params/", k)] = v + end end - close(fd) end ## JLD version of the same @@ -39,11 +39,11 @@ end ## the reverse encoding of Julia types. function retype(d::Dict) r = Dict() - for (k,v) in d - if (m=match(r"^(.*):Bool$", k)) != nothing - r[m.captures[1]] = v>0 - elseif (m=match(r"(.*):Symbol", k)) != nothing - r[m.captures[1]] = symbol(v) + for (k, v) in d + if (m=match(r"^(.*):Bool$", k)) !== nothing + r[m.captures[1]] = v > 0 + elseif (m=match(r"(.*):Symbol", k)) !== nothing + r[m.captures[1]] = Symbol(v) else r[k] = v end @@ -53,7 +53,7 @@ end ## Try to handle missing elements in the hdf5 file more gacefully function h5check(obj, name, content) - content ∈ HDF5.keys(obj) || error('"', name, '"', " does not contain ", '"', content, '"') + HDF5.haskey(obj, content) || error('"', name, '"', " does not contain ", '"', content, '"') obj[content] end @@ -62,11 +62,11 @@ function feaload(file::AbstractString; meta=false, params=false) HDF5.h5open(file, "r") do fd f = h5check(fd, file, "features") fea = map(Float64, read(h5check(f, "features", "data"))) - if length(fea)==0 # "no data" + if isempty(fea) # "no data" m = read(h5check(f, "features", "meta")) - fea = zeros(0,m["nfea"]) + fea = zeros(0, m["nfea"]) end - if ! (meta || params) + if !(meta || params) return fea end res = Any[fea] diff --git a/src/mfccs.jl b/src/mfccs.jl index 9878a15..c07c7ae 100644 --- a/src/mfccs.jl +++ b/src/mfccs.jl @@ -12,14 +12,15 @@ using DSP ## Recoded from rastamat's "melfcc.m" (c) Dan Ellis. ## Defaults here are HTK parameters, this is contrary to melfcc function mfcc(x::Vector{T}, sr::Real=16000.0; wintime=0.025, steptime=0.01, numcep=13, - lifterexp=-22, sumpower=false, preemph=0.97, dither=false, minfreq=0.0, maxfreq=sr/2, - nbands=20, bwidth=1.0, dcttype=3, fbtype=:htkmel, - usecmp=false, modelorder=0) where {T<:AbstractFloat} - if (preemph != 0) + lifterexp=-22, preemph=0.97, minfreq=0.0, maxfreq=sr/2, nbands=20, + bwidth=1.0, dcttype=3, fbtype=:htkmel, modelorder=0, sumpower::Bool=false, + dither::Bool=false, usecmp::Bool=false) where {T<:AbstractFloat} + if !iszero(preemph) x = filt(PolynomialRatio([1., -preemph], [1.]), x) end - pspec = powspec(x, sr, wintime=wintime, steptime=steptime, dither=dither) - aspec = audspec(pspec, sr, nfilts=nbands, fbtype=fbtype, minfreq=minfreq, maxfreq=maxfreq, sumpower=sumpower, bwidth=bwidth) + pspec = powspec(x, sr; wintime=wintime, steptime=steptime, dither=dither) + aspec = audspec(pspec, sr; nfilts=nbands, fbtype=fbtype, minfreq=minfreq, + maxfreq=maxfreq, sumpower=sumpower, bwidth=bwidth) if usecmp # PLP-like weighting/compression aspec = postaud(aspec, maxfreq, fbtype) @@ -44,12 +45,13 @@ function mfcc(x::Vector{T}, sr::Real=16000.0; wintime=0.025, steptime=0.01, numc return (cepstra, pspec', meta) end -mfcc(x::Array{T}, sr::Real=16000.0; args...) where {T<:AbstractFloat} = @distributed (tuple) for i=1:size(x)[2] mfcc(x[:,i], sr; args...) end +mfcc(x::AbstractVector{<:AbstractFloat}, sr::Real=16000.0; args...) = mfcc(Vector(x), sr; args...) +mfcc(x::AbstractMatrix{<:AbstractFloat}, sr::Real=16000.0; args...) = @distributed (tuple) for i in axes(x, 2) mfcc(x[:, i], sr; args...) end ## default feature configurations, :rasta, :htk, :spkid_toolkit, :wbspeaker ## With optional extra agrs... you can specify more options -function mfcc(x::Vector{T}, sr::AbstractFloat, defaults::Symbol; args...) where {T<:AbstractFloat} +function mfcc(x::AbstractVector{<:AbstractFloat}, sr::AbstractFloat, defaults::Symbol; args...) if defaults == :rasta mfcc(x, sr; lifterexp=0.6, sumpower=true, nbands=40, dcttype=2, fbtype=:mel, args...) elseif defaults ∈ [:spkid_toolkit, :nbspeaker] @@ -64,48 +66,45 @@ function mfcc(x::Vector{T}, sr::AbstractFloat, defaults::Symbol; args...) where end ## our features run down with time, this is essential for the use of DSP.filt() -function deltas(x::Matrix{T}, w::Int=9) where {T<:AbstractFloat} - (nr, nc) = size(x) - if nr == 0 || w <= 1 +function deltas(x::AbstractMatrix{T}, w::Int=9) where {T<:AbstractFloat} + nr, nc = size(x) + if iszero(nr) || w <= 1 return x end hlen = floor(Int, w/2) w = 2hlen + 1 # make w odd win = collect(convert(T, hlen):-1:-hlen) - x1 = reshape(x[1, :], 1, size(x, 2)) ## julia v0.4 v0.5 changeover - xend = reshape(x[end,:], 1, size(x, 2)) - xx = vcat(repeat(x1, hlen, 1), x, repeat(xend, hlen, 1)) ## take care of boundaries + x1 = x[1:1, :] + xend = x[end:end,:] + xx = vcat(repeat(x1, hlen), x, repeat(xend, hlen)) ## take care of boundaries norm = 3 / (hlen * w * (hlen + 1)) - return norm * filt(PolynomialRatio(win, [1.]), xx)[2hlen .+ (1:nr), :] + delta_v = filt(PolynomialRatio(win, [1.]), xx)[2hlen .+ (1:nr), :] + @. delta_v *= norm + return delta_v end +sortperm_along(a::AbstractArray, dim::Int) = (v=similar(a, Int, size(a, dim)); mapslices(x->sortperm!(v, x), a; dims=dim)) -import Base.Sort.sortperm -sortperm(a::Array, dim::Int) = mapslices(sortperm, a, dims=dim) - -erfinvtabs = Dict{Int,Vector{Float64}}() +erfinvtabs = Dict{Int, Vector{Float64}}() function erfinvtab(wl::Int) global erfinvtabs - if wl ∉ keys(erfinvtabs) - erfinvtabs[wl] = √2 * erfinv.(2collect(1:wl) / (wl + 1) .- 1) + if !haskey(erfinvtabs, wl) + erfinvtabs[wl] = @. √2 * erfinv(2(1:wl) / (wl + 1) - 1) end return erfinvtabs[wl] end -function warpstats(x::Matrix{T}, w::Int=399) where {T<:Real} - nx, nfea = size(x) +function warpstats(x::AbstractMatrix{<:Real}, w::Int=399) + nx = size(x, 1) wl = min(w, nx) - hw = (wl+1) / 2 - rank = similar(x, Int) if nx < w - index = sortperm(x, 1) - for j in 1:nfea - rank[index[:,j], j] = collect(1:nx) - end + rank = sortperm_along(x, 1) else - for j in 1:nfea # operations in outer loop over columns, better for memory cache + rank = similar(x, Int) + hw = round(Int, (wl+1) / 2) + for j in axes(x, 2) # operations in outer loop over columns, better for memory cache for i in 1:nx - s = max(1, i - round(Int, hw) + 1) + s = max(1, i - hw + 1) e = s + w - 1 if e > nx d = e - nx @@ -114,7 +113,9 @@ function warpstats(x::Matrix{T}, w::Int=399) where {T<:Real} end r = 1 for k in s:e - r += x[i, j] > x[k, j] + if x[i, j] > x[k, j] + r += 1 + end end rank[i, j] = r end @@ -123,20 +124,25 @@ function warpstats(x::Matrix{T}, w::Int=399) where {T<:Real} return rank, erfinvtab(wl) end -function warp(x::Matrix{T}, w::Int=399) where {T<:Real} +function warp(x::AbstractMatrix{<:Real}, w::Int=399) rank, erfinvtab = warpstats(x, w) return erfinvtab[rank] end -warp(x::Vector{T}, w::Int=399) where {T<:Real} = warp(x'', w) +warp(x::AbstractVector{<:Real}, w::Int=399) = warp(reshape(x, :, 1), w) -function WarpedArray(x::Matrix, w::Int) +function WarpedArray(x::AbstractMatrix{<:Real}, w::Int) rank, erfinvtab = warpstats(x, w) WarpedArray(rank, erfinvtab) end ## mean and variance normalization -znorm(x::Array, dim::Int=1) = znorm!(copy(x), dim) -znorm!(x::Array, dim::Int=1) = broadcast!(/, x, broadcast!(-, x, x, mean(x, dims=dim)), std(x, dims=dim)) +znorm(x::AbstractArray, dim::Int=1) = znorm!(copy(x), dim) +function znorm!(x::AbstractArray, dim::Int=1) + mean_x = mean(x; dims=dim) + std_x = std(x; dims=dim) + @. x = (x - mean_x) / std_x + x +end ## short-term mean and variance normalization function stmvn(x::Vector, w::Int=399) @@ -170,15 +176,15 @@ end stmvn(m::Matrix, w::Int=399, dim::Int=1) = mapslices(x->stmvn(x, w), m, dims=dim) ## Shifted Delta Coefficients by copying -function sdc(x::Matrix{T}, n::Int=7, d::Int=1, p::Int=3, k::Int=7) where {T<:AbstractFloat} +function sdc(x::AbstractMatrix{T}, n::Int=7, d::Int=1, p::Int=3, k::Int=7) where {T<:AbstractFloat} nx, nfea = size(x) n ≤ nfea || error("Not enough features for n") hnfill = (k-1) * p / 2 nfill1, nfill2 = floor(Int, hnfill), ceil(Int, hnfill) - xx = vcat(zeros(eltype(x), nfill1, n), deltas(x[:,1:n], 2d+1), zeros(eltype(x), nfill2, n)) - y = zeros(eltype(x), nx, n*k) + xx = vcat(zeros(T, nfill1, n), deltas(x[:,1:n], 2d+1), zeros(T, nfill2, n)) + y = zeros(T, nx, n*k) for (dt, offset) in zip(0:p:p*k-1, 0:n:n*k-1) - y[:, offset+(1:n)] = xx[dt+(1:nx), :] + y[:, offset.+(1:n)] = xx[dt.+(1:nx), :] end return y end diff --git a/src/rasta.jl b/src/rasta.jl index d9eb3c4..d83df62 100644 --- a/src/rasta.jl +++ b/src/rasta.jl @@ -12,11 +12,11 @@ using DSP using FFTW using LinearAlgebra -function powspec(x::Vector{T}, sr::Real=8000.0; wintime=0.025, steptime=0.01, dither=true) where {T<:AbstractFloat} +function powspec(x::AbstractVector{<:AbstractFloat}, sr::Real=8000.0; wintime=0.025, steptime=0.01, dither=true) nwin = round(Integer, wintime * sr) nstep = round(Integer, steptime * sr) - nfft = 2 .^ Integer((ceil(log2(nwin)))) + nfft = nextpow(2, nwin) window = hamming(nwin) # overrule default in specgram which is hanning in Octave noverlap = nwin - nstep @@ -29,8 +29,9 @@ function powspec(x::Vector{T}, sr::Real=8000.0; wintime=0.025, steptime=0.01, di end # audspec tested against octave with simple vectors for all fbtypes -function audspec(x::Matrix{T}, sr::Real=16000.0; nfilts=ceil(Int, hz2bark(sr/2)), fbtype=:bark, - minfreq=0., maxfreq=sr/2, sumpower=true, bwidth=1.0) where {T<:AbstractFloat} +function audspec(x::AbstractMatrix{<:AbstractFloat}, sr::Real=16000.0; + nfilts=ceil(Int, hz2bark(sr / 2)), fbtype=:bark, + minfreq=0., maxfreq=sr/2, sumpower::Bool=true, bwidth=1.0) nfreqs, nframes = size(x) nfft = 2(nfreqs-1) if fbtype == :bark @@ -75,40 +76,49 @@ end hz2bark(f) = 6 * asinh(f / 600) bark2hz(bark) = 600 * sinh(bark / 6) -function fft2melmx(nfft::Int, nfilts::Int; sr=8000.0, width=1.0, minfreq=0.0, maxfreq=sr/2, htkmel=false, constamp=false) +function slope_gen(fs, fftfreq) + f1, f2, f3 = fs + # lower and upper slopes for all bins + loslope = (fftfreq - f1) / (f2 - f1) + hislope = (f3 - fftfreq) / (f3 - f2) + # then intersect them with each other and zero + max(0, min(loslope, hislope)) +end + +function fft2melmx(nfft::Int, nfilts::Int; sr=8000.0, width=1.0, minfreq=0.0, + maxfreq=sr/2, htkmel::Bool=false, constamp::Bool=false) wts = zeros(nfilts, nfft) + lastind = (nfft>>1) # Center freqs of each DFT bin - fftfreqs = collect(0:nfft-1) / nfft * sr; + fftfreqs = collect(0:lastind-1) / nfft * sr; # 'Center freqs' of mel bands - uniformly spaced between limits minmel = hz2mel(minfreq, htkmel); maxmel = hz2mel(maxfreq, htkmel); - binfreqs = mel2hz(minmel .+ collect(0:(nfilts+1)) / (nfilts + 1) * (maxmel - minmel), htkmel); + binfreqs = @. mel2hz(minmel + (0:(nfilts+1)) / (nfilts + 1) * (maxmel - minmel), htkmel); ## binbin = iround(binfrqs/sr*(nfft-1)); for i in 1:nfilts - fs = binfreqs[i .+ (0:2)] + fs = binfreqs[i], binfreqs[i+1], binfreqs[i+2] # scale by width - fs = fs[2] .+ (fs .- fs[2])width - # lower and upper slopes for all bins - loslope = (fftfreqs .- fs[1]) / (fs[2] - fs[1]) - hislope = (fs[3] .- fftfreqs) / (fs[3] - fs[2]) - # then intersect them with each other and zero - wts[i,:] = max.(0, min.(loslope, hislope)) + fs = @. fs[2] + (fs - fs[2])width + for j in eachindex(fftfreqs) + wts[i, j] = slope_gen(fs, fftfreqs[j]) + end end if !constamp ## unclear what this does... ## Slaney-style mel is scaled to be approx constant E per channel - wts = broadcast(*, 2 ./ ((binfreqs[3:nfilts+2]) - binfreqs[1:nfilts]), wts) + @. wts = 2 / ((binfreqs[3:nfilts+2]) - binfreqs[1:nfilts]) * wts + # Make sure 2nd half of DFT is zero + wts[:, lastind+1:nfft] .= 0. end - # Make sure 2nd half of DFT is zero - wts[:, (nfft>>1)+1:nfft] .= 0. return wts end -function hz2mel(f::Vector{T}, htk=false) where {T<:AbstractFloat} +function hz2mel(f::AbstractVector{<:AbstractFloat}, htk::Bool=false) if htk - return 2595 .* log10.(1 .+ f / 700) + return @. 2595 * log10(1 + f / 700) else f0 = 0.0 fsp = 200 / 3 @@ -116,40 +126,38 @@ function hz2mel(f::Vector{T}, htk=false) where {T<:AbstractFloat} brkpt = (brkfrq - f0) / fsp logstep = exp(log(6.4) / 27) linpts = f .< brkfrq - z = zeros(size(f)) # prevent InexactError() by making these Float64 - z[findall(linpts)] = f[findall(linpts)]/brkfrq ./ log(logstep) - z[findall(.!linpts)] = brkpt .+ log.(f[findall(.!linpts)] / brkfrq) ./ log(logstep) + z = zeros(axes(f)) # prevent InexactError() by making these Float64 + @. z[linpts] = f[linpts] / brkfrq / log(logstep) + @. z[!linpts] = brkpt + log(f[!linpts] / brkfrq) / log(logstep) end return z end -hz2mel(f::AbstractFloat, htk=false) = hz2mel([f], htk)[1] +hz2mel(f::AbstractFloat, htk::Bool=false) = hz2mel([f], htk)[1] -function mel2hz(z::Vector{T}, htk=false) where {T<:AbstractFloat} +function mel2hz(z::AbstractFloat, htk::Bool=false) if htk - f = 700 .* (10 .^ (z ./ 2595) .- 1) + f = @. 700 * (10 ^ (z / 2595) - 1) else f0 = 0.0 fsp = 200 / 3 brkfrq = 1000.0 brkpt = (brkfrq - f0) / fsp logstep = exp(log(6.4) / 27) - linpts = z .< brkpt - f = similar(z) - f[linpts] = f0 .+ fsp * z[linpts] - f[.!linpts] = brkfrq .* exp.(log.(logstep) * (z[.!linpts] .- brkpt)) + linpt = z < brkpt + f = linpt ? f0 + fsp * z : brkfrq * exp(log(logstep) * (z - brkpt)) end return f end function postaud(x::Matrix{T}, fmax::Real, fbtype=:bark, broaden=false) where {T<:AbstractFloat} - (nbands,nframes) = size(x) + nbands, nframes = size(x) nfpts = nbands + 2broaden if fbtype == :bark bandcfhz = bark2hz(range(0, stop=hz2bark(fmax), length=nfpts)) elseif fbtype == :mel - bandcfhz = mel2hz(range(0, stop=hz2mel(fmax), length=nfpts)) + bandcfhz = mel2hz.(range(0, stop=hz2mel(fmax), length=nfpts)) elseif fbtype == :htkmel || fbtype == :fcmel - bandcfhz = mel2hz(range(0, stop=hz2mel(fmax,1), length=nfpts),1); + bandcfhz = mel2hz.(range(0, stop=hz2mel(fmax, true), length=nfpts),1); else error("Unknown filterbank type") end @@ -158,11 +166,11 @@ function postaud(x::Matrix{T}, fmax::Real, fbtype=:bark, broaden=false) where {T # Hynek's magic equal-loudness-curve formula fsq = bandcfhz.^2 ftmp = fsq + 1.6e5 - eql = ((fsq ./ ftmp).^2) .* ((fsq + 1.44e6) ./ (fsq + 9.61e6)) + eql = @. ((fsq / ftmp)^2) * ((fsq + 1.44e6) / (fsq + 9.61e6)) # weight the critical bands - z = broadcast(*, eql, x) + z = eql .* x # cube root compress - z .^= 0.33 + @. z = cbrt(z) # replicate first and last band (because they are unreliable as calculated) if broaden z = z[[1, 1:nbands, nbands], :]; @@ -203,55 +211,57 @@ function lpc2cep(a::Array{T}, ncep::Int=0) where {T<:AbstractFloat} return c end -function spec2cep(spec::Array{T}, ncep::Int=13, dcttype::Int=2) where {T<:AbstractFloat} +function spec2cep(spec::AbstractMatrix{<:AbstractFloat}, ncep::Int=13, dcttype::Int=2) # no discrete cosine transform option - dcttype == -1 && return log(spec) + dcttype == -1 && return log.(spec) nr, nc = size(spec) dctm = similar(spec, ncep, nr) if 1 < dcttype < 4 # type 2,3 - for i in 1:ncep - dctm[i, :] = cos.((i - 1) * collect(1:2:2nr-1)π / (2nr)) * √(2/nr) + for j in 1:nr, i in 1:ncep + dctm[i, j] = cospi((i - 1) * (2j-1) / (2nr)) * √(2/nr) end if dcttype == 2 - dctm[1, :] ./= √2 + @. dctm[1, :] /= √2 + end + elseif dcttype == 4 # type 4 + for j in 1:nr, i in 1:ncep + dctm[i, j] = 2cospi((i-1) * j/(nr+1)) end - elseif dcttype == 4 # type 4 - for i in 1:ncep - dctm[i, :] = 2cos.((i-1) * collect(1:nr)π/(nr+1)) - dctm[i, 1] += 1 - dctm[i, nr] += (-1)^(i-1) + for i in axes(dctm, 1) + dctm[i, end] += (-1)^(i-1) end - dctm /= 2(nr + 1) + @. dctm[:, 1] += 1 + @. dctm /= 2(nr + 1) else # type 1 - for i in 1:ncep - dctm[i, :] = cos.((i-1) * collect(0:nr-1)π ./ (nr - 1)) ./ (nr - 1) + for j in 1:nr, i in 1:ncep + dctm[i, j] = cospi((i-1) * (j-1) / (nr - 1)) / (nr - 1) end - dctm[:, [1, nr]] /= 2 + @. dctm[:, [1, nr]] /= 2 end - return dctm * log.(spec) + # assume spec is not reused + return dctm * map!(log, spec, spec) end -function lifter(x::Array{T}, lift::Real=0.6, invs=false) where {T<:AbstractFloat} - (ncep, nf) = size(x) - if lift == 0 +function lifter(x::AbstractArray{<:AbstractFloat}, lift::Real=0.6, invs=false) + ncep, nf = size(x) + if iszero(lift) return x - end - if lift > 0 + elseif lift > 0 if lift > 10 - error("Too high lift number") + error("Lift number is too high (>10)") end - liftw = [1; collect(1:ncep-1).^lift] + liftw = pushfirst!((1:ncep-1).^lift, 1) else # Hack to support HTK liftering if !isa(lift, Integer) error("Negative lift must be interger") end lift = -lift # strictly speaking unnecessary... - liftw = vcat(1, (1 .+ lift/2 * sin.(collect(1:ncep-1)π / lift))) + liftw = @. 1 + lift / 2 * sinpi((0:ncep-1) / lift) end if invs - liftw = 1 ./ liftw + @. liftw = inv(liftw) end y = broadcast(*, x, liftw) return y @@ -292,17 +302,17 @@ function levinson(acf::Vector{T}, p::Int) where {T<:Real} end pushfirst!(a, 1) end - return (a, v) + return a, v end function levinson(acf::Array{T}, p::Int) where {T<:Real} - (nr,nc) = size(acf) + nr, nc = size(acf) a = zeros(p + 1, nc) v = zeros(p + 1, nc) for i in 1:nc a[:,i], v[:,i] = levinson(acf[:,i], p) end - return (a, v) + return a, v end ## Freely after octave's implementation, ver 3.2.4, by jwe && jh @@ -310,8 +320,8 @@ end function toeplitz(c::Vector{T}, r::Vector{T}=c) where {T<:Real} nc = length(r) nr = length(c) - res = zeros(typeof(c[1]), nr, nc) - if nc == 0 || nr == 0 + res = zeros(T, nr, nc) + if iszero(nc) || iszero(nr) return res end if r[1] != c[1] @@ -319,7 +329,6 @@ function toeplitz(c::Vector{T}, r::Vector{T}=c) where {T<:Real} end if typeof(c) <: Complex conj!(c) - c[1] = conj(c[1]) # bug in julia? end ## if issparse(c) && ispsparse(r) data = [r[end:-1:2], c]