diff --git a/NEWS.md b/NEWS.md
index 540ef72655..512f250676 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,6 @@
+# v0.9.0
+* [Depthwise comvolutional layer API changes](https://github.com/FluxML/Flux.jl/pull/756) from `in => mult` channel specification to `in => out` channel specification, and deprecates implicit `out` constructor.
+
 # v0.8.0
 
 * New [ConvTranspose layer](https://github.com/FluxML/Flux.jl/pull/311).
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index a59a8c6a6a..3739fd1c34 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -136,18 +136,17 @@ end
 (a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
   a(T.(x))
 """
-    DepthwiseConv(size, in)
-    DepthwiseConv(size, in=>mul)
-    DepthwiseConv(size, in=>mul, relu)
+    DepthwiseConv(size, in=>out)
+    DepthwiseConv(size, in=>out, relu)
 
 Depthwise convolutional layer. `size` should be a tuple like `(2, 2)`.
-`in` and `mul` specify the number of input channels and channel multiplier respectively.
-In case the `mul` is not specified it is taken as 1.
+`in` and `out` specify the number of input and output channels respectively.
+Note that `out` must be an integer multiple of `in`.
 
 Data should be stored in WHCN order. In other words, a 100×100 RGB image would
 be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
 
-Takes the keyword arguments `pad` and `stride`.
+Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
 struct DepthwiseConv{N,M,F,A,V}
   σ::F
@@ -166,17 +165,18 @@ function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identit
   return DepthwiseConv(σ, w, b, stride, pad, dilation)
 end
 
-DepthwiseConv(k::NTuple{N,Integer}, ch::Integer, σ = identity; init = glorot_uniform,
-     stride = 1, pad = 0, dilation = 1) where N =
-  DepthwiseConv(param(init(k..., 1, ch)), param(zeros(ch)), σ,
-       stride = stride, pad = pad, dilation=dilation)
-
-DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform,
-     stride::NTuple{N,Integer} = map(_->1,k),
-     pad::NTuple{N,Integer} = map(_->0,2 .* k),
-     dilation::NTuple{N,Integer} = map(_->1,k)) where N =
-  DepthwiseConv(param(init(k..., ch[2], ch[1])), param(zeros(ch[2]*ch[1])), σ,
-       stride = stride, pad = pad)
+function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
+     init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N
+  @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels"
+  return DepthwiseConv(
+    param(init(k..., div(ch[2], ch[1]), ch[1])),
+    param(zeros(ch[2])),
+    σ;
+    stride = stride,
+    pad = pad,
+    dilation = dilation
+  )
+end
 
 @treelike DepthwiseConv
 
@@ -187,8 +187,8 @@ function (c::DepthwiseConv)(x)
 end
 
 function Base.show(io::IO, l::DepthwiseConv)
-  print(io, "DepthwiseConv(", size(l.weight)[1:ndims(l.weight)-2])
-  print(io, ", ", size(l.weight, ndims(l.weight)), "=>", size(l.weight, ndims(l.weight)-1))
+  print(io, "DepthwiseConv(", size(l.weight)[1:end-2])
+  print(io, ", ", size(l.weight)[end], "=>", prod(size(l.weight)[end-1:end]))
   l.σ == identity || print(io, ", ", l.σ)
   print(io, ")")
 end
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index 2b9b04e2d4..5e12e42668 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -39,20 +39,14 @@ end
 
 @testset "Depthwise Conv" begin
   r = zeros(Float32, 28, 28, 3, 5)
-  m1 = DepthwiseConv((2, 2), 3=>5)
+  m1 = DepthwiseConv((2, 2), 3=>15)
   @test size(m1(r), 3) == 15
-  m2 = DepthwiseConv((2, 2), 3)
-  @test size(m2(r), 3) == 3
   
-  x = zeros(Float64, 28, 28, 3, 5)
-  
-  m3 = DepthwiseConv((2, 2), 3 => 5)
-  
-  @test size(m3(r), 3) == 15
-  
-  m4 = DepthwiseConv((2, 2), 3)
-  
-  @test size(m4(r), 3) == 3
+  m3 = DepthwiseConv((2, 3), 3=>9)
+  @test size(m3(r), 3) == 9
+
+  # Test that we cannot ask for non-integer multiplication factors
+  @test_throws AssertionError DepthwiseConv((2,2), 3=>10)
 end
 
 @testset "ConvTranspose" begin
diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl
index 8bc3d1cde8..bfb46cf63d 100644
--- a/test/layers/normalisation.jl
+++ b/test/layers/normalisation.jl
@@ -252,7 +252,6 @@ end
       @test !m.active
 
       x′ = m(x).data
-      println(x′[1])
       @test isapprox(x′[1], (1 - 0.95) / sqrt(1.25 + 1f-5), atol = 1.0e-5)
   end
   # with activation function