Fix return dtype in MVDR module (#2376)

Summary: Address #2375 The MVDR module internally transforms the dtype of complex tensors to `torch.complex128` for computation and transforms it back to the original dtype before returning the Tensor. However, it didn't convert back successfully due to `specgram_enhanced.to(dtype)`, which should be `specgram_enhanced = specgram_enhanced.to(dtype)`. Fix it to make the output dtype consistent with original input. Pull Request resolved: #2376 Reviewed By: hwangjeff Differential Revision: D36280851 Pulled By: nateanl fbshipit-source-id: 553d1b98f899547209a4e3ebc59920c7ef1f3112
pytorch · May 10, 2022 · 2f4eb4a · 2f4eb4a
1 parent eab2f39
commit 2f4eb4a
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 2 deletions.
diff --git a/test/torchaudio_unittest/transforms/transforms_test_impl.py b/test/torchaudio_unittest/transforms/transforms_test_impl.py
@@ -131,3 +131,20 @@ def test_psd(self, duration, channel, mask, multi_mask):
             psd_np = psd_numpy(spectrogram.detach().numpy(), mask, multi_mask)
         psd = transform(spectrogram, mask)
         self.assertEqual(psd, psd_np, atol=1e-5, rtol=1e-5)
+
+    @parameterized.expand(
+        [
+            param(torch.complex64),
+            param(torch.complex128),
+        ]
+    )
+    def test_mvdr(self, dtype):
+        """Make sure the output dtype is the same as the input dtype"""
+        transform = T.MVDR()
+        waveform = get_whitenoise(sample_rate=8000, duration=0.5, n_channels=3)
+        specgram = get_spectrogram(waveform, n_fft=400)  # (channel, freq, time)
+        specgram = specgram.to(dtype)
+        mask_s = torch.rand(specgram.shape[-2:])
+        mask_n = torch.rand(specgram.shape[-2:])
+        specgram_enhanced = transform(specgram, mask_s, mask_n)
+        assert specgram_enhanced.dtype == dtype
diff --git a/torchaudio/transforms/_transforms.py b/torchaudio/transforms/_transforms.py
@@ -2087,8 +2087,7 @@ def forward(
         # unpack batch
         specgram_enhanced = specgram_enhanced.reshape(shape[:-3] + shape[-2:])
 
-        specgram_enhanced.to(dtype)
-        return specgram_enhanced
+        return specgram_enhanced.to(dtype)
 
 
 class RTFMVDR(torch.nn.Module):