Uberi · ftnext · Jan 9, 2023 · Jan 9, 2023 · Jan 9, 2023 · Jan 9, 2023
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -31,7 +31,7 @@ jobs:
       - name: Install Python dependencies
         run: |
           python -m pip install 'pocketsphinx<5'
-          python -m pip install git+https://github.com/openai/whisper.git
+          python -m pip install git+https://github.com/openai/whisper.git soundfile
           python -m pip install .
       - name: Test with unittest
         run: |

diff --git a/README.rst b/README.rst
@@ -179,7 +179,7 @@ Whisper (for Whisper users)
 ~~~~~~~~~~~~~~~~~~~~~
 Whisper is **required if and only if you want to use whisper** (``recognizer_instance.recognize_whisper``).
 
-You can install it with ``python3 -m pip install git+https://github.com/openai/whisper.git``.
+You can install it with ``python3 -m pip install git+https://github.com/openai/whisper.git soundfile``.
 
 Troubleshooting
 ---------------

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
@@ -1684,23 +1684,28 @@ def recognize_whisper(self, audio_data, model="base", show_dict=False, load_opti
         """
 
         assert isinstance(audio_data, AudioData), "Data must be audio data"
+        import numpy as np
+        import soundfile as sf
         import torch
         import whisper
 
         if load_options or not hasattr(self, "whisper_model") or self.whisper_model.get(model) is None:
             self.whisper_model = getattr(self, "whisper_model", {})
             self.whisper_model[model] = whisper.load_model(model, **load_options or {})
 
-        with tempfile.NamedTemporaryFile(suffix=".wav") as f:
-            f.write(audio_data.get_wav_data())
-            f.flush()
-            result = self.whisper_model[model].transcribe(
-                f.name,
-                language=language,
-                task="translate" if translate else None,
-                fp16=torch.cuda.is_available(),
-                **transcribe_options
-            )
+        # 16 kHz https://github.com/openai/whisper/blob/28769fcfe50755a817ab922a7bc83483159600a9/whisper/audio.py#L98-L99
+        wav_bytes = audio_data.get_wav_data(convert_rate=16000)
+        wav_stream = io.BytesIO(wav_bytes)
+        audio_array, sampling_rate = sf.read(wav_stream)
+        audio_array = audio_array.astype(np.float32)
+
+        result = self.whisper_model[model].transcribe(
+            audio_array,
+            language=language,
+            task="translate" if translate else None,
+            fp16=torch.cuda.is_available(),
+            **transcribe_options
+        )
 
         if show_dict:
             return result