From cd031eba25e314853d64f0777a6dec410365ff51 Mon Sep 17 00:00:00 2001 From: ftnext Date: Sun, 8 Dec 2024 20:57:18 +0900 Subject: [PATCH 1/5] [feat] Only openai is responsible for OPENAI_API_KEY --- speech_recognition/recognizers/openai.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/speech_recognition/recognizers/openai.py b/speech_recognition/recognizers/openai.py index 79843d69..faa38fad 100644 --- a/speech_recognition/recognizers/openai.py +++ b/speech_recognition/recognizers/openai.py @@ -1,6 +1,5 @@ from __future__ import annotations -import os from typing import Literal from typing_extensions import Unpack @@ -34,7 +33,6 @@ def recognize( audio_data: "AudioData", *, model: WhisperModel = "whisper-1", - api_key: str | None = None, **kwargs: Unpack[OpenAIOptionalParameters], ) -> str: """ @@ -44,11 +42,8 @@ def recognize( Detail: https://platform.openai.com/docs/guides/speech-to-text - Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the openai installation, or the environment variable is missing. + Set environment variable ``OPENAI_API_KEY``; otherwise openai library will raise a ``openai.OpenAIError``. """ - if api_key is None and os.environ.get("OPENAI_API_KEY") is None: - raise SetupError("Set environment variable ``OPENAI_API_KEY``") - try: import openai except ImportError: @@ -56,5 +51,5 @@ def recognize( "missing openai module: ensure that openai is set up correctly." ) - recognizer = OpenAICompatibleRecognizer(openai.OpenAI(api_key=api_key)) + recognizer = OpenAICompatibleRecognizer(openai.OpenAI()) return recognizer.recognize(audio_data, model, **kwargs) From 64fe53339d235557efc83a2bc17536d1c24bae5e Mon Sep 17 00:00:00 2001 From: ftnext Date: Sun, 8 Dec 2024 20:57:47 +0900 Subject: [PATCH 2/5] [docs] Tweak OpenAI example --- examples/microphone_recognition.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/microphone_recognition.py b/examples/microphone_recognition.py index 38ef95bd..a4f10a9b 100644 --- a/examples/microphone_recognition.py +++ b/examples/microphone_recognition.py @@ -2,6 +2,8 @@ # NOTE: this example requires PyAudio because it uses the Microphone class +import os + import speech_recognition as sr # obtain audio from the microphone @@ -95,7 +97,8 @@ # recognize speech using Whisper API OPENAI_API_KEY = "INSERT OPENAI API KEY HERE" +os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY try: - print(f"Whisper API thinks you said {r.recognize_openai(audio, api_key=OPENAI_API_KEY)}") + print(f"OpenAI Whisper API thinks you said {r.recognize_openai(audio)}") except sr.RequestError as e: - print(f"Could not request results from Whisper API; {e}") + print(f"Could not request results from OpenAI Whisper API; {e}") From 8b77fb5c69127c2b9f441a5eab614b64a4e425e9 Mon Sep 17 00:00:00 2001 From: ftnext Date: Sun, 8 Dec 2024 20:58:23 +0900 Subject: [PATCH 3/5] [docs] See openai.recognize()'s docstring (for future) --- reference/library-reference.rst | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/reference/library-reference.rst b/reference/library-reference.rst index db244db7..41e6187a 100644 --- a/reference/library-reference.rst +++ b/reference/library-reference.rst @@ -314,16 +314,10 @@ You can translate the result to english with Whisper by passing translate=True Other values are passed directly to whisper. See https://github.com/openai/whisper/blob/main/whisper/transcribe.py for all options -``recognizer_instance.recognize_openai(audio_data: AudioData, model: str = "whisper-1", api_key: str | None = None)`` ---------------------------------------------------------------------------------------------------------------------- - -Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the OpenAI Whisper API. - -This function requires an OpenAI account; visit https://platform.openai.com/signup, then generate API Key in `User settings `__. - -Detail: https://platform.openai.com/docs/guides/speech-to-text +``recognizer_instance.recognize_openai(audio_data: AudioData, model = "whisper-1", **kwargs)`` +---------------------------------------------------------------------------------------------- -Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the openai installation, or the environment variable is missing. +.. autofunction:: speech_recognition.recognizers.openai.recognize ``recognizer_instance.recognize_groq(audio_data: AudioData, model = "whisper-large-v3-turbo", **kwargs)`` --------------------------------------------------------------------------------------------------------- From ded2b421720f3afd4455723bbd4401619683cd04 Mon Sep 17 00:00:00 2001 From: ftnext Date: Sun, 8 Dec 2024 20:59:30 +0900 Subject: [PATCH 4/5] [docs] Tell setting OPENAI_API_KEY --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 4e4edd8c..5fc4067d 100644 --- a/README.rst +++ b/README.rst @@ -178,10 +178,10 @@ OpenAI Whisper API (for OpenAI Whisper API users) The library `openai `__ is **required if and only if you want to use OpenAI Whisper API** (``recognizer_instance.recognize_openai``). -If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_openai`` will raise an ``RequestError``. - You can install it with ``python3 -m pip install SpeechRecognition[whisper-api]``. +Please set the environment variable ``OPENAI_API_KEY`` before calling ``recognizer_instance.recognize_openai``. + Groq Whisper API (for Groq Whisper API users) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -189,7 +189,7 @@ The library `groq `__ is **required if and only You can install it with ``python3 -m pip install SpeechRecognition[groq]``. -Please set the environment variable ``GROQ_API_KEY`` before calling ``recognizer_instance.recognize_groq`` +Please set the environment variable ``GROQ_API_KEY`` before calling ``recognizer_instance.recognize_groq``. Troubleshooting --------------- From b25fd600985af9a6fafc6e351b9bdee20805c704 Mon Sep 17 00:00:00 2001 From: ftnext Date: Sun, 8 Dec 2024 21:01:16 +0900 Subject: [PATCH 5/5] [refactor] Rename extra whisper-api -> openai --- .github/workflows/unittests.yml | 6 +++--- README.rst | 2 +- setup.cfg | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index b4685c0c..8b2a6daa 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -44,16 +44,16 @@ jobs: - name: Install Python dependencies (Ubuntu, <=3.12) if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.13' run: | - python -m pip install .[dev,audio,pocketsphinx,whisper-local,whisper-api,groq] + python -m pip install .[dev,audio,pocketsphinx,whisper-local,openai,groq] - name: Install Python dependencies (Ubuntu, 3.13) if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13' run: | python -m pip install standard-aifc setuptools - python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,whisper-api,groq] + python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,openai,groq] - name: Install Python dependencies (Windows) if: matrix.os == 'windows-latest' run: | - python -m pip install .[dev,whisper-local,whisper-api,groq] + python -m pip install .[dev,whisper-local,openai,groq] - name: Test with unittest run: | pytest --doctest-modules -v speech_recognition/recognizers/ tests/ diff --git a/README.rst b/README.rst index 5fc4067d..0052ccf4 100644 --- a/README.rst +++ b/README.rst @@ -178,7 +178,7 @@ OpenAI Whisper API (for OpenAI Whisper API users) The library `openai `__ is **required if and only if you want to use OpenAI Whisper API** (``recognizer_instance.recognize_openai``). -You can install it with ``python3 -m pip install SpeechRecognition[whisper-api]``. +You can install it with ``python3 -m pip install SpeechRecognition[openai]``. Please set the environment variable ``OPENAI_API_KEY`` before calling ``recognizer_instance.recognize_openai``. diff --git a/setup.cfg b/setup.cfg index dbba0147..8de63858 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,7 @@ pocketsphinx = whisper-local = openai-whisper soundfile -whisper-api = +openai = openai httpx < 0.28 groq =