Skip to content

Commit

Permalink
Merge branch 'voice-mode'
Browse files Browse the repository at this point in the history
  • Loading branch information
xinydev committed Aug 19, 2023
2 parents 150b25d + 60c8144 commit 22ff73d
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 2 deletions.
20 changes: 19 additions & 1 deletion gptw/gptw.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import openai

import gptw
from gptw.voice import chat_in_audio


def args_init():
Expand Down Expand Up @@ -47,6 +48,14 @@ def args_init():
help="list all available sub cmds",
)

parser.add_argument(
"--voice",
dest="voice",
action="store_true",
default=False,
help="chat with gpt in voice mode",
)

parser.add_argument(
"-d",
"--debug",
Expand Down Expand Up @@ -112,7 +121,7 @@ def get_config(key, default_value=None):
return default_value
return CFG[key]
except Exception:
print("config not found, run `ww --config` to set it")
print(f"config {key} not found, run `ww --config` to set it")
sys.exit(1)


Expand Down Expand Up @@ -198,6 +207,15 @@ def main():
set_config(k, v)
exit(0)

if args.voice:
token = get_config("azure-token")
endpoint = get_config("azure-endpoint")
depname = get_config("azure-depname")
tts_key = get_config("tts-key")
tts_region = get_config("tts-region")
chat_in_audio(token, endpoint, depname, tts_key, tts_region)
exit(0)

prompts = get_prompts()
logging.debug(f"configs:{prompts}")

Expand Down
102 changes: 102 additions & 0 deletions gptw/voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import time

import azure.cognitiveservices.speech as speechsdk
import openai

ssml = """<speak xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:mstts="http://www.w3.org/2001/mstts"
xmlns:emo="http://www.w3.org/2009/10/emotionml"
version="1.0" xml:lang="en-US">
<voice name="en-US-JennyNeural"><s/>
<mstts:express-as style="friendly">
<prosody rate="+8.00%" pitch="+3.00%">
{}
</prosody>
</mstts:express-as><s/>
</voice></speak>"""


prompts = """
As an English language learning assistant,
your first task is to offer users a selection of engaging topics to choose from.
Next, you engage in conversation with user.
For each user input, your primary task is to assess whether their grammar is correct and
whether it aligns with the conventions used by native speakers. and provide feedback to the user.
make sure keep every response short and concise.
After provide the feedback, pose the next question related to the user's input.
"""

recognizer_results = []


def chat_in_audio(token, endpoint, depname, tts_key, tts_region):

global recognizer_results

# Set up OpenAI API credentials
openai.api_key = token
openai.api_base = endpoint
openai.api_type = "azure"
openai.api_version = "2023-05-15"

# Set up Azure TTS API credentials
speech_config = speechsdk.SpeechConfig(subscription=tts_key, region=tts_region)
speech_config.output_format = speechsdk.OutputFormat.Detailed

# Set up speech recognizer
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config)

# https://speech.microsoft.com/portal/voicegallery
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)

# Set up conversation loop
messages = [
{
"role": "system",
"content": prompts,
},
]

speech_recognizer.recognized.connect(recognized_callback)
while True:
# Get user input from microphone
print("Speak then press Enter to continue, or type 'exit' to exit")

speech_recognizer.start_continuous_recognition()

x = input("...: ")
if x == "exit":
break

speech_recognizer.stop_continuous_recognition_async().get()
time.sleep(1)
# Use speech recognizer to convert speech to text
user_input = " ".join(recognizer_results)
recognizer_results = []
print("You said: " + user_input)
messages.append({"role": "user", "content": user_input})
completion = openai.ChatCompletion.create(
engine=depname,
messages=messages,
temperature=0.5,
)

# Get response text
response_text = str(completion.choices[0].message.content).strip()
print("AI said: " + response_text)
messages.append({"role": "assistant", "content": response_text})

play_result = speech_synthesizer.speak_ssml_async(
ssml.format(response_text)
).get()

if play_result.reason != speechsdk.ResultReason.SynthesizingAudioCompleted:
print(f"Error synthesizing audio: {play_result.error_details}")

time.sleep(0.5)


def recognized_callback(evt):
global recognizer_results
if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
recognizer_results.append(evt.result.text)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ keywords = [
]
license = "Apache-2.0"
requires-python = ">=3.7.1"
dependencies = ["openai>=0.27.0"]
dependencies = ["openai>=0.27.0", "azure-cognitiveservices-speech"]
optional-dependencies.testing = ["coverage==5.5", "pytest==7.1.3"]
classifiers = [
"Development Status :: 3 - Alpha",
Expand Down

0 comments on commit 22ff73d

Please sign in to comment.