diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index eef992c5..b4685c0c 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -44,16 +44,16 @@ jobs:
- name: Install Python dependencies (Ubuntu, <=3.12)
if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.13'
run: |
- python -m pip install .[dev,audio,pocketsphinx,whisper-local,whisper-api]
+ python -m pip install .[dev,audio,pocketsphinx,whisper-local,whisper-api,groq]
- name: Install Python dependencies (Ubuntu, 3.13)
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
run: |
python -m pip install standard-aifc setuptools
- python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,whisper-api]
+ python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,whisper-api,groq]
- name: Install Python dependencies (Windows)
if: matrix.os == 'windows-latest'
run: |
- python -m pip install .[dev,whisper-local,whisper-api]
+ python -m pip install .[dev,whisper-local,whisper-api,groq]
- name: Test with unittest
run: |
pytest --doctest-modules -v speech_recognition/recognizers/ tests/
diff --git a/README.rst b/README.rst
index 51809ddf..337f6873 100644
--- a/README.rst
+++ b/README.rst
@@ -39,7 +39,8 @@ Speech recognition engine/API support:
* `Tensorflow `__
* `Vosk API `__ (works offline)
* `OpenAI whisper `__ (works offline)
-* `Whisper API `__
+* `OpenAI Whisper API `__
+* `Groq Whisper API `__
**Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.
@@ -96,7 +97,8 @@ To use all of the functionality of the library, you should have:
* **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
* **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
* **Whisper** (required only if you need to use Whisper ``recognizer_instance.recognize_whisper``)
-* **openai** (required only if you need to use Whisper API speech recognition ``recognizer_instance.recognize_whisper_api``)
+* **openai** (required only if you need to use OpenAI Whisper API speech recognition ``recognizer_instance.recognize_whisper_api``)
+* **groq** (required only if you need to use Groq Whisper API speech recognition ``recognizer_instance.recognize_groq``)
The following requirements are optional, but can improve or extend functionality in some situations:
@@ -171,15 +173,24 @@ Whisper is **required if and only if you want to use whisper** (``recognizer_ins
You can install it with ``python3 -m pip install SpeechRecognition[whisper-local]``.
-Whisper API (for Whisper API users)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+OpenAI Whisper API (for OpenAI Whisper API users)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The library `openai `__ is **required if and only if you want to use Whisper API** (``recognizer_instance.recognize_whisper_api``).
+The library `openai `__ is **required if and only if you want to use OpenAI Whisper API** (``recognizer_instance.recognize_whisper_api``).
If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_whisper_api`` will raise an ``RequestError``.
You can install it with ``python3 -m pip install SpeechRecognition[whisper-api]``.
+Groq Whisper API (for Groq Whisper API users)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The library `groq `__ is **required if and only if you want to use Groq Whisper API** (``recognizer_instance.recognize_groq``).
+
+If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_groq`` will raise an ``RequestError``.
+
+You can install it with ``python3 -m pip install SpeechRecognition[groq]``.
+
Troubleshooting
---------------
diff --git a/setup.cfg b/setup.cfg
index 2124911a..dbba0147 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,6 +4,7 @@ dev =
rstcheck
pytest
pytest-randomly
+ respx
audio =
PyAudio >= 0.2.11
pocketsphinx =
@@ -13,5 +14,9 @@ whisper-local =
soundfile
whisper-api =
openai
+ httpx < 0.28
+groq =
+ groq
+ httpx < 0.28
assemblyai =
requests
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 253ab0fe..238d5e50 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1506,12 +1506,13 @@ def flush(self, *args, **kwargs):
# At this time, the dependencies are not yet installed, resulting in a ModuleNotFoundError.
# This is a workaround to resolve this issue
try:
- from .recognizers import google, whisper
+ from .recognizers import google, groq, whisper
except (ModuleNotFoundError, ImportError):
pass
else:
Recognizer.recognize_google = google.recognize_legacy
Recognizer.recognize_whisper_api = whisper.recognize_whisper_api
+ Recognizer.recognize_groq = groq.recognize_groq
# ===============================
diff --git a/speech_recognition/recognizers/groq.py b/speech_recognition/recognizers/groq.py
new file mode 100644
index 00000000..b36822f2
--- /dev/null
+++ b/speech_recognition/recognizers/groq.py
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+import os
+from typing import Literal, TypedDict
+from typing_extensions import Unpack
+
+from speech_recognition.audio import AudioData
+from speech_recognition.exceptions import SetupError
+from speech_recognition.recognizers.whisper_api import (
+ OpenAICompatibleRecognizer,
+)
+
+# https://console.groq.com/docs/speech-text#supported-models
+GroqModel = Literal[
+ "whisper-large-v3-turbo", "whisper-large-v3", "distil-whisper-large-v3-en"
+]
+
+
+class GroqOptionalParameters(TypedDict):
+ """Groq speech transcription's optional parameters.
+
+ https://console.groq.com/docs/speech-text#transcription-endpoint-usage
+ """
+
+ prompt: str
+ response_format: str
+ temperature: float
+ language: str
+
+
+def recognize_groq(
+ recognizer,
+ audio_data: "AudioData",
+ *,
+ model: GroqModel = "whisper-large-v3-turbo",
+ **kwargs: Unpack[GroqOptionalParameters],
+) -> str:
+ """
+ Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Groq Whisper API.
+
+ This function requires login to Groq; visit https://console.groq.com/login, then generate API Key in `API Keys `__ menu.
+
+ Detail: https://console.groq.com/docs/speech-text
+
+ Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the groq installation, or the environment variable is missing.
+ """
+ if os.environ.get("GROQ_API_KEY") is None:
+ raise SetupError("Set environment variable ``GROQ_API_KEY``")
+
+ try:
+ import groq
+ except ImportError:
+ raise SetupError(
+ "missing groq module: ensure that groq is set up correctly."
+ )
+
+ recognizer = OpenAICompatibleRecognizer(groq.Groq())
+ return recognizer.recognize(audio_data, model)
diff --git a/speech_recognition/recognizers/whisper_api.py b/speech_recognition/recognizers/whisper_api.py
new file mode 100644
index 00000000..c435ef59
--- /dev/null
+++ b/speech_recognition/recognizers/whisper_api.py
@@ -0,0 +1,22 @@
+from io import BytesIO
+
+from speech_recognition.audio import AudioData
+
+
+class OpenAICompatibleRecognizer:
+ def __init__(self, client) -> None:
+ self.client = client
+
+ def recognize(self, audio_data: "AudioData", model: str, **kwargs) -> str:
+ if not isinstance(audio_data, AudioData):
+ raise ValueError(
+ "``audio_data`` must be an ``AudioData`` instance"
+ )
+
+ wav_data = BytesIO(audio_data.get_wav_data())
+ wav_data.name = "SpeechRecognition_audio.wav"
+
+ transcript = self.client.audio.transcriptions.create(
+ file=wav_data, model=model, **kwargs
+ )
+ return transcript.text
diff --git a/tests/recognizers/test_groq.py b/tests/recognizers/test_groq.py
new file mode 100644
index 00000000..c821eba5
--- /dev/null
+++ b/tests/recognizers/test_groq.py
@@ -0,0 +1,33 @@
+from unittest.mock import MagicMock
+
+import httpx
+import respx
+
+from speech_recognition import AudioData, Recognizer
+from speech_recognition.recognizers import groq
+
+
+@respx.mock(assert_all_called=True, assert_all_mocked=True)
+def test_transcribe_with_groq_whisper(respx_mock, monkeypatch):
+ monkeypatch.setenv("GROQ_API_KEY", "gsk_grok_api_key")
+
+ respx_mock.post(
+ "https://api.groq.com/openai/v1/audio/transcriptions"
+ ).mock(
+ return_value=httpx.Response(
+ 200,
+ json={
+ "text": "Transcription by Groq Whisper",
+ "x_groq": {"id": "req_unique_id"},
+ },
+ )
+ )
+
+ audio_data = MagicMock(spec=AudioData)
+ audio_data.get_wav_data.return_value = b"audio_data"
+
+ actual = groq.recognize_groq(
+ MagicMock(spec=Recognizer), audio_data, model="whisper-large-v3"
+ )
+
+ assert actual == "Transcription by Groq Whisper"