diff --git a/src/mfccs.jl b/src/mfccs.jl
index 9878a15..c07c7ae 100644
--- a/src/mfccs.jl
+++ b/src/mfccs.jl
@@ -12,14 +12,15 @@ using DSP
 ## Recoded from rastamat's "melfcc.m" (c) Dan Ellis.
 ## Defaults here are HTK parameters, this is contrary to melfcc
 function mfcc(x::Vector{T}, sr::Real=16000.0; wintime=0.025, steptime=0.01, numcep=13,
-              lifterexp=-22, sumpower=false, preemph=0.97, dither=false, minfreq=0.0, maxfreq=sr/2,
-              nbands=20, bwidth=1.0, dcttype=3, fbtype=:htkmel,
-              usecmp=false, modelorder=0) where {T<:AbstractFloat}
-    if (preemph != 0)
+              lifterexp=-22, preemph=0.97, minfreq=0.0, maxfreq=sr/2, nbands=20,
+              bwidth=1.0, dcttype=3, fbtype=:htkmel, modelorder=0, sumpower::Bool=false,
+              dither::Bool=false, usecmp::Bool=false) where {T<:AbstractFloat}
+    if !iszero(preemph)
         x = filt(PolynomialRatio([1., -preemph], [1.]), x)
     end
-    pspec = powspec(x, sr, wintime=wintime, steptime=steptime, dither=dither)
-    aspec = audspec(pspec, sr, nfilts=nbands, fbtype=fbtype, minfreq=minfreq, maxfreq=maxfreq, sumpower=sumpower, bwidth=bwidth)
+    pspec = powspec(x, sr; wintime=wintime, steptime=steptime, dither=dither)
+    aspec = audspec(pspec, sr; nfilts=nbands, fbtype=fbtype, minfreq=minfreq,
+                    maxfreq=maxfreq, sumpower=sumpower, bwidth=bwidth)
     if usecmp
         #  PLP-like weighting/compression
         aspec = postaud(aspec, maxfreq, fbtype)
@@ -44,12 +45,13 @@ function mfcc(x::Vector{T}, sr::Real=16000.0; wintime=0.025, steptime=0.01, numc
     return (cepstra, pspec', meta)
 end
 
-mfcc(x::Array{T}, sr::Real=16000.0; args...) where {T<:AbstractFloat} = @distributed (tuple) for i=1:size(x)[2] mfcc(x[:,i], sr; args...) end
+mfcc(x::AbstractVector{<:AbstractFloat}, sr::Real=16000.0; args...) = mfcc(Vector(x), sr; args...)
+mfcc(x::AbstractMatrix{<:AbstractFloat}, sr::Real=16000.0; args...) = @distributed (tuple) for i in axes(x, 2) mfcc(x[:, i], sr; args...) end
 
 
 ## default feature configurations, :rasta, :htk, :spkid_toolkit, :wbspeaker
 ## With optional extra agrs... you can specify more options
-function mfcc(x::Vector{T}, sr::AbstractFloat, defaults::Symbol; args...) where {T<:AbstractFloat}
+function mfcc(x::AbstractVector{<:AbstractFloat}, sr::AbstractFloat, defaults::Symbol; args...)
     if defaults == :rasta
         mfcc(x, sr; lifterexp=0.6, sumpower=true, nbands=40, dcttype=2, fbtype=:mel, args...)
     elseif defaults ∈ [:spkid_toolkit, :nbspeaker]
@@ -64,48 +66,45 @@ function mfcc(x::Vector{T}, sr::AbstractFloat, defaults::Symbol; args...) where
 end
 
 ## our features run down with time, this is essential for the use of DSP.filt()
-function deltas(x::Matrix{T}, w::Int=9) where {T<:AbstractFloat}
-    (nr, nc) = size(x)
-    if nr == 0 || w <= 1
+function deltas(x::AbstractMatrix{T}, w::Int=9) where {T<:AbstractFloat}
+    nr, nc = size(x)
+    if iszero(nr) || w <= 1
         return x
     end
     hlen = floor(Int, w/2)
     w = 2hlen + 1                 # make w odd
     win = collect(convert(T, hlen):-1:-hlen)
-    x1 = reshape(x[1, :], 1, size(x, 2)) ## julia v0.4 v0.5 changeover
-    xend = reshape(x[end,:], 1, size(x, 2))
-    xx = vcat(repeat(x1, hlen, 1), x, repeat(xend, hlen, 1)) ## take care of boundaries
+    x1 = x[1:1, :]
+    xend = x[end:end,:]
+    xx = vcat(repeat(x1, hlen), x, repeat(xend, hlen)) ## take care of boundaries
     norm = 3 / (hlen * w * (hlen + 1))
-    return norm * filt(PolynomialRatio(win, [1.]), xx)[2hlen .+ (1:nr), :]
+    delta_v = filt(PolynomialRatio(win, [1.]), xx)[2hlen .+ (1:nr), :]
+    @. delta_v *= norm
+    return delta_v
 end
 
+sortperm_along(a::AbstractArray, dim::Int) = (v=similar(a, Int, size(a, dim)); mapslices(x->sortperm!(v, x), a; dims=dim))
 
-import Base.Sort.sortperm
-sortperm(a::Array, dim::Int) = mapslices(sortperm, a, dims=dim)
-
-erfinvtabs = Dict{Int,Vector{Float64}}()
+erfinvtabs = Dict{Int, Vector{Float64}}()
 function erfinvtab(wl::Int)
     global erfinvtabs
-    if wl ∉ keys(erfinvtabs)
-        erfinvtabs[wl] = √2 * erfinv.(2collect(1:wl) / (wl + 1) .- 1)
+    if !haskey(erfinvtabs, wl)
+        erfinvtabs[wl] = @. √2 * erfinv(2(1:wl) / (wl + 1) - 1)
     end
     return erfinvtabs[wl]
 end
 
-function warpstats(x::Matrix{T}, w::Int=399) where {T<:Real}
-    nx, nfea = size(x)
+function warpstats(x::AbstractMatrix{<:Real}, w::Int=399)
+    nx = size(x, 1)
     wl = min(w, nx)
-    hw = (wl+1) / 2
-    rank = similar(x, Int)
     if nx < w
-        index = sortperm(x, 1)
-        for j in 1:nfea
-            rank[index[:,j], j] = collect(1:nx)
-        end
+        rank = sortperm_along(x, 1)
     else
-        for j in 1:nfea            # operations in outer loop over columns, better for memory cache
+        rank = similar(x, Int)
+        hw = round(Int, (wl+1) / 2)
+        for j in axes(x, 2)            # operations in outer loop over columns, better for memory cache
             for i in 1:nx
-                s = max(1, i - round(Int, hw) + 1)
+                s = max(1, i - hw + 1)
                 e = s + w - 1
                 if e > nx
                     d = e - nx
@@ -114,7 +113,9 @@ function warpstats(x::Matrix{T}, w::Int=399) where {T<:Real}
                 end
                 r = 1
                 for k in s:e
-                    r += x[i, j] > x[k, j]
+                    if x[i, j] > x[k, j]
+                        r += 1
+                    end
                 end
                 rank[i, j] = r
             end
@@ -123,20 +124,25 @@ function warpstats(x::Matrix{T}, w::Int=399) where {T<:Real}
     return rank, erfinvtab(wl)
 end
 
-function warp(x::Matrix{T}, w::Int=399) where {T<:Real}
+function warp(x::AbstractMatrix{<:Real}, w::Int=399)
     rank, erfinvtab = warpstats(x, w)
     return erfinvtab[rank]
 end
-warp(x::Vector{T}, w::Int=399) where {T<:Real} = warp(x'', w)
+warp(x::AbstractVector{<:Real}, w::Int=399) = warp(reshape(x, :, 1), w)
 
-function WarpedArray(x::Matrix, w::Int)
+function WarpedArray(x::AbstractMatrix{<:Real}, w::Int)
     rank, erfinvtab = warpstats(x, w)
     WarpedArray(rank, erfinvtab)
 end
 
 ## mean and variance normalization
-znorm(x::Array, dim::Int=1) = znorm!(copy(x), dim)
-znorm!(x::Array, dim::Int=1) = broadcast!(/, x, broadcast!(-, x, x, mean(x, dims=dim)), std(x, dims=dim))
+znorm(x::AbstractArray, dim::Int=1) = znorm!(copy(x), dim)
+function znorm!(x::AbstractArray, dim::Int=1)
+    mean_x = mean(x; dims=dim)
+    std_x = std(x; dims=dim)
+    @. x = (x - mean_x) / std_x
+    x
+end
 
 ## short-term mean and variance normalization
 function stmvn(x::Vector, w::Int=399)
@@ -170,15 +176,15 @@ end
 stmvn(m::Matrix, w::Int=399, dim::Int=1) = mapslices(x->stmvn(x, w), m, dims=dim)
 
 ## Shifted Delta Coefficients by copying
-function sdc(x::Matrix{T}, n::Int=7, d::Int=1, p::Int=3, k::Int=7) where {T<:AbstractFloat}
+function sdc(x::AbstractMatrix{T}, n::Int=7, d::Int=1, p::Int=3, k::Int=7) where {T<:AbstractFloat}
     nx, nfea = size(x)
     n ≤ nfea || error("Not enough features for n")
     hnfill = (k-1) * p / 2
     nfill1, nfill2 = floor(Int, hnfill), ceil(Int, hnfill)
-    xx = vcat(zeros(eltype(x), nfill1, n), deltas(x[:,1:n], 2d+1), zeros(eltype(x), nfill2, n))
-    y = zeros(eltype(x), nx, n*k)
+    xx = vcat(zeros(T, nfill1, n), deltas(x[:,1:n], 2d+1), zeros(T, nfill2, n))
+    y = zeros(T, nx, n*k)
     for (dt, offset) in zip(0:p:p*k-1, 0:n:n*k-1)
-        y[:, offset+(1:n)] = xx[dt+(1:nx), :]
+        y[:, offset.+(1:n)] = xx[dt.+(1:nx), :]
     end
     return y
 end
diff --git a/src/rasta.jl b/src/rasta.jl
index d9eb3c4..d83df62 100644
--- a/src/rasta.jl
+++ b/src/rasta.jl
@@ -12,11 +12,11 @@ using DSP
 using FFTW
 using LinearAlgebra
 
-function powspec(x::Vector{T}, sr::Real=8000.0; wintime=0.025, steptime=0.01, dither=true) where {T<:AbstractFloat}
+function powspec(x::AbstractVector{<:AbstractFloat}, sr::Real=8000.0; wintime=0.025, steptime=0.01, dither=true)
     nwin = round(Integer, wintime * sr)
     nstep = round(Integer, steptime * sr)
 
-    nfft = 2 .^ Integer((ceil(log2(nwin))))
+    nfft = nextpow(2, nwin)
     window = hamming(nwin)      # overrule default in specgram which is hanning in Octave
     noverlap = nwin - nstep
 
@@ -29,8 +29,9 @@ function powspec(x::Vector{T}, sr::Real=8000.0; wintime=0.025, steptime=0.01, di
 end
 
 # audspec tested against octave with simple vectors for all fbtypes
-function audspec(x::Matrix{T}, sr::Real=16000.0; nfilts=ceil(Int, hz2bark(sr/2)), fbtype=:bark,
-                 minfreq=0., maxfreq=sr/2, sumpower=true, bwidth=1.0) where {T<:AbstractFloat}
+function audspec(x::AbstractMatrix{<:AbstractFloat}, sr::Real=16000.0;
+                 nfilts=ceil(Int, hz2bark(sr / 2)), fbtype=:bark,
+                 minfreq=0., maxfreq=sr/2, sumpower::Bool=true, bwidth=1.0)
     nfreqs, nframes = size(x)
     nfft = 2(nfreqs-1)
     if fbtype == :bark
@@ -75,40 +76,49 @@ end
 hz2bark(f) = 6 * asinh(f / 600)
 bark2hz(bark) = 600 * sinh(bark / 6)
 
-function fft2melmx(nfft::Int, nfilts::Int; sr=8000.0, width=1.0, minfreq=0.0, maxfreq=sr/2, htkmel=false, constamp=false)
+function slope_gen(fs, fftfreq)
+    f1, f2, f3 = fs
+    # lower and upper slopes for all bins
+    loslope = (fftfreq - f1) / (f2 - f1)
+    hislope = (f3 - fftfreq) / (f3 - f2)
+    # then intersect them with each other and zero
+    max(0, min(loslope, hislope))
+end
+
+function fft2melmx(nfft::Int, nfilts::Int; sr=8000.0, width=1.0, minfreq=0.0,
+                   maxfreq=sr/2, htkmel::Bool=false, constamp::Bool=false)
     wts = zeros(nfilts, nfft)
+    lastind = (nfft>>1)
     # Center freqs of each DFT bin
-    fftfreqs = collect(0:nfft-1) / nfft * sr;
+    fftfreqs = collect(0:lastind-1) / nfft * sr;
     # 'Center freqs' of mel bands - uniformly spaced between limits
     minmel = hz2mel(minfreq, htkmel);
     maxmel = hz2mel(maxfreq, htkmel);
-    binfreqs = mel2hz(minmel .+ collect(0:(nfilts+1)) / (nfilts + 1) * (maxmel - minmel), htkmel);
+    binfreqs = @. mel2hz(minmel + (0:(nfilts+1)) / (nfilts + 1) * (maxmel - minmel), htkmel);
 ##    binbin = iround(binfrqs/sr*(nfft-1));
 
     for i in 1:nfilts
-        fs = binfreqs[i .+ (0:2)]
+        fs = binfreqs[i], binfreqs[i+1], binfreqs[i+2]
         # scale by width
-        fs = fs[2] .+ (fs .- fs[2])width
-        # lower and upper slopes for all bins
-        loslope = (fftfreqs .- fs[1]) / (fs[2] - fs[1])
-        hislope = (fs[3] .- fftfreqs) / (fs[3] - fs[2])
-        # then intersect them with each other and zero
-        wts[i,:] = max.(0, min.(loslope, hislope))
+        fs = @. fs[2] + (fs - fs[2])width
+        for j in eachindex(fftfreqs)
+            wts[i, j] = slope_gen(fs, fftfreqs[j])
+        end
     end
 
     if !constamp
         ## unclear what this does...
         ## Slaney-style mel is scaled to be approx constant E per channel
-        wts = broadcast(*, 2 ./ ((binfreqs[3:nfilts+2]) - binfreqs[1:nfilts]), wts)
+        @. wts = 2 / ((binfreqs[3:nfilts+2]) - binfreqs[1:nfilts]) * wts
+        # Make sure 2nd half of DFT is zero
+        wts[:, lastind+1:nfft] .= 0.
     end
-    # Make sure 2nd half of DFT is zero
-    wts[:, (nfft>>1)+1:nfft] .= 0.
     return wts
 end
 
-function hz2mel(f::Vector{T}, htk=false) where {T<:AbstractFloat}
+function hz2mel(f::AbstractVector{<:AbstractFloat}, htk::Bool=false)
     if htk
-        return 2595 .* log10.(1 .+ f / 700)
+        return @. 2595 * log10(1 + f / 700)
     else
         f0 = 0.0
         fsp = 200 / 3
@@ -116,40 +126,38 @@ function hz2mel(f::Vector{T}, htk=false) where {T<:AbstractFloat}
         brkpt = (brkfrq - f0) / fsp
         logstep = exp(log(6.4) / 27)
         linpts = f .< brkfrq
-        z = zeros(size(f))      # prevent InexactError() by making these Float64
-        z[findall(linpts)] = f[findall(linpts)]/brkfrq ./ log(logstep)
-        z[findall(.!linpts)] = brkpt .+ log.(f[findall(.!linpts)] / brkfrq) ./ log(logstep)
+        z = zeros(axes(f))      # prevent InexactError() by making these Float64
+        @. z[linpts] = f[linpts] / brkfrq / log(logstep)
+        @. z[!linpts] = brkpt + log(f[!linpts] / brkfrq) / log(logstep)
     end
     return z
 end
-hz2mel(f::AbstractFloat, htk=false)  = hz2mel([f], htk)[1]
+hz2mel(f::AbstractFloat, htk::Bool=false)  = hz2mel([f], htk)[1]
 
-function mel2hz(z::Vector{T}, htk=false) where {T<:AbstractFloat}
+function mel2hz(z::AbstractFloat, htk::Bool=false)
     if htk
-        f = 700 .* (10 .^ (z ./ 2595) .- 1)
+        f = @. 700 * (10 ^ (z / 2595) - 1)
     else
         f0 = 0.0
         fsp = 200 / 3
         brkfrq = 1000.0
         brkpt = (brkfrq - f0) / fsp
         logstep = exp(log(6.4) / 27)
-        linpts = z .< brkpt
-        f = similar(z)
-        f[linpts] = f0 .+ fsp * z[linpts]
-        f[.!linpts] = brkfrq .* exp.(log.(logstep) * (z[.!linpts] .- brkpt))
+        linpt = z < brkpt
+        f = linpt ? f0 + fsp * z : brkfrq * exp(log(logstep) * (z - brkpt))
     end
     return f
 end
 
 function postaud(x::Matrix{T}, fmax::Real, fbtype=:bark, broaden=false) where {T<:AbstractFloat}
-    (nbands,nframes) = size(x)
+    nbands, nframes = size(x)
     nfpts = nbands + 2broaden
     if fbtype == :bark
         bandcfhz = bark2hz(range(0, stop=hz2bark(fmax), length=nfpts))
     elseif fbtype == :mel
-        bandcfhz = mel2hz(range(0, stop=hz2mel(fmax), length=nfpts))
+        bandcfhz = mel2hz.(range(0, stop=hz2mel(fmax), length=nfpts))
     elseif fbtype == :htkmel || fbtype == :fcmel
-        bandcfhz = mel2hz(range(0, stop=hz2mel(fmax,1), length=nfpts),1);
+        bandcfhz = mel2hz.(range(0, stop=hz2mel(fmax, true), length=nfpts),1);
     else
         error("Unknown filterbank type")
     end
@@ -158,11 +166,11 @@ function postaud(x::Matrix{T}, fmax::Real, fbtype=:bark, broaden=false) where {T
     # Hynek's magic equal-loudness-curve formula
     fsq = bandcfhz.^2
     ftmp = fsq + 1.6e5
-    eql = ((fsq ./ ftmp).^2) .* ((fsq + 1.44e6) ./ (fsq + 9.61e6))
+    eql = @. ((fsq / ftmp)^2) * ((fsq + 1.44e6) / (fsq + 9.61e6))
     # weight the critical bands
-    z = broadcast(*, eql, x)
+    z = eql .* x
     # cube root compress
-    z .^= 0.33
+    @. z = cbrt(z)
     # replicate first and last band (because they are unreliable as calculated)
     if broaden
         z = z[[1, 1:nbands, nbands], :];
@@ -203,55 +211,57 @@ function lpc2cep(a::Array{T}, ncep::Int=0) where {T<:AbstractFloat}
     return c
 end
 
-function spec2cep(spec::Array{T}, ncep::Int=13, dcttype::Int=2) where {T<:AbstractFloat}
+function spec2cep(spec::AbstractMatrix{<:AbstractFloat}, ncep::Int=13, dcttype::Int=2)
     # no discrete cosine transform option
-    dcttype == -1 && return log(spec)
+    dcttype == -1 && return log.(spec)
 
     nr, nc = size(spec)
     dctm = similar(spec, ncep, nr)
     if 1 < dcttype < 4          # type 2,3
-        for i in 1:ncep
-            dctm[i, :] = cos.((i - 1) * collect(1:2:2nr-1)π / (2nr)) * √(2/nr)
+        for j in 1:nr, i in 1:ncep
+            dctm[i, j] = cospi((i - 1) * (2j-1) / (2nr)) * √(2/nr)
         end
         if dcttype == 2
-            dctm[1, :] ./= √2
+            @. dctm[1, :] /= √2
+        end
+    elseif dcttype == 4         # type 4
+        for j in 1:nr, i in 1:ncep
+            dctm[i, j] = 2cospi((i-1) * j/(nr+1))
         end
-    elseif dcttype == 4           # type 4
-        for i in 1:ncep
-            dctm[i, :] = 2cos.((i-1) * collect(1:nr)π/(nr+1))
-            dctm[i, 1] += 1
-            dctm[i, nr] += (-1)^(i-1)
+        for i in axes(dctm, 1)
+            dctm[i, end] += (-1)^(i-1)
         end
-        dctm /= 2(nr + 1)
+        @. dctm[:, 1] += 1
+        @. dctm /= 2(nr + 1)
     else                        # type 1
-        for i in 1:ncep
-            dctm[i, :] = cos.((i-1) * collect(0:nr-1)π ./ (nr - 1)) ./ (nr - 1)
+        for j in 1:nr, i in 1:ncep
+            dctm[i, j] = cospi((i-1) * (j-1) / (nr - 1)) / (nr - 1)
         end
-        dctm[:, [1, nr]] /= 2
+        @. dctm[:, [1, nr]] /= 2
     end
-    return dctm * log.(spec)
+    # assume spec is not reused
+    return dctm * map!(log, spec, spec)
 end
 
-function lifter(x::Array{T}, lift::Real=0.6, invs=false) where {T<:AbstractFloat}
-    (ncep, nf) = size(x)
-    if lift == 0
+function lifter(x::AbstractArray{<:AbstractFloat}, lift::Real=0.6, invs=false)
+    ncep, nf = size(x)
+    if iszero(lift)
         return x
-    end
-    if lift > 0
+    elseif lift > 0
         if lift > 10
-            error("Too high lift number")
+            error("Lift number is too high (>10)")
         end
-        liftw = [1; collect(1:ncep-1).^lift]
+        liftw = pushfirst!((1:ncep-1).^lift, 1)
     else
         # Hack to support HTK liftering
         if !isa(lift, Integer)
             error("Negative lift must be interger")
         end
         lift = -lift            # strictly speaking unnecessary...
-        liftw = vcat(1, (1 .+ lift/2 * sin.(collect(1:ncep-1)π / lift)))
+        liftw = @. 1 + lift / 2 * sinpi((0:ncep-1) / lift)
     end
     if invs
-        liftw = 1 ./ liftw
+        @. liftw = inv(liftw)
     end
     y = broadcast(*, x, liftw)
     return y
@@ -292,17 +302,17 @@ function levinson(acf::Vector{T}, p::Int) where {T<:Real}
         end
         pushfirst!(a, 1)
     end
-    return (a, v)
+    return a, v
 end
 
 function levinson(acf::Array{T}, p::Int) where {T<:Real}
-    (nr,nc) = size(acf)
+    nr, nc = size(acf)
     a = zeros(p + 1, nc)
     v = zeros(p + 1, nc)
     for i in 1:nc
         a[:,i], v[:,i] = levinson(acf[:,i], p)
     end
-    return (a, v)
+    return a, v
 end
 
 ## Freely after octave's implementation, ver 3.2.4, by jwe && jh
@@ -310,8 +320,8 @@ end
 function toeplitz(c::Vector{T}, r::Vector{T}=c) where {T<:Real}
     nc = length(r)
     nr = length(c)
-    res = zeros(typeof(c[1]), nr, nc)
-    if nc == 0 || nr == 0
+    res = zeros(T, nr, nc)
+    if iszero(nc) || iszero(nr)
         return res
     end
     if r[1] != c[1]
@@ -319,7 +329,6 @@ function toeplitz(c::Vector{T}, r::Vector{T}=c) where {T<:Real}
     end
     if typeof(c) <: Complex
         conj!(c)
-        c[1] = conj(c[1])       # bug in julia?
     end
     ## if issparse(c) && ispsparse(r)
     data = [r[end:-1:2], c]