diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/backend/talkingbot.yaml b/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/backend/talkingbot.yaml index 304fd154587..37dec3d89e4 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/backend/talkingbot.yaml +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/backend/talkingbot.yaml @@ -20,7 +20,7 @@ ################################################################################# # SERVER SETTING # ################################################################################# -host: 127.0.0.1 +host: 0.0.0.0 port: 8888 model_name_or_path: "Intel/neural-chat-7b-v1-1" diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/textbot.yaml b/intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/textbot.yaml index e027667db5d..0a02a8d50e8 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/textbot.yaml +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/textbot.yaml @@ -20,7 +20,7 @@ ################################################################################# # SERVER SETTING # ################################################################################# -host: 127.0.0.1 +host: 0.0.0.0 port: 8000 model_name_or_path: "starcoder_int8" diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/tts.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/tts.py index 3fe7294d0e8..eb86d6999ea 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/tts.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/tts.py @@ -201,6 +201,8 @@ def stream_text2speech(self, generator, output_audio_path, voice="default"): def post_llm_inference_actions(self, text_or_generator): + from intel_extension_for_transformers.neural_chat.plugins import plugins + self.voice = plugins.tts.args["voice"] if self.stream_mode: def cache_words_into_sentences(): buffered_texts = [] diff --git a/intel_extension_for_transformers/neural_chat/server/restful/voicechat_api.py b/intel_extension_for_transformers/neural_chat/server/restful/voicechat_api.py index afa7fc4868f..d899d88f268 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/voicechat_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/voicechat_api.py @@ -22,6 +22,7 @@ from fastapi import File, UploadFile, Form from pydub import AudioSegment from ...config import GenerationConfig +from ...plugins import plugins import base64 import torch @@ -47,9 +48,10 @@ def handle_voice_asr_request(self, filename: str) -> str: except Exception as e: raise Exception(e) - async def handle_voice_chat_request(self, prompt: str, audio_output_path: Optional[str]=None) -> str: + async def handle_voice_chat_request(self, prompt: str, voice: str, audio_output_path: Optional[str]=None) -> str: chatbot = self.get_chatbot() try: + plugins.tts.args["voice"] = voice config = GenerationConfig(audio_output_path=audio_output_path) result, link = chatbot.chat_stream(query=prompt, config=config) def audio_file_generate(result): @@ -90,6 +92,7 @@ async def handle_talkingbot_asr(file: UploadFile = File(...)): audio = AudioSegment.from_file("tmp_audio_bytes") audio = audio.set_frame_rate(16000) # bytes to wav + file_name = file_name +'.wav' audio.export(f"{file_name}", format="wav") asr_result = router.handle_voice_asr_request(file_name) return {"asr_result": asr_result} @@ -105,7 +108,7 @@ async def talkingbot(request: Request): logger.info(f'Received prompt: {text}, and use voice: {voice} knowledge_id: {knowledge_id}') - return await router.handle_voice_chat_request(text, audio_output_path) + return await router.handle_voice_chat_request(text, voice, audio_output_path) @router.post("/v1/talkingbot/create_embedding") async def create_speaker_embedding(file: UploadFile = File(...)): @@ -120,5 +123,5 @@ async def create_speaker_embedding(file: UploadFile = File(...)): audio = AudioSegment.from_file(f"tmp_spk_{file_name}") audio.export(f"{spk_id}", format="mp3") - router.handle_create_speaker_embedding(spk_id) + await router.handle_create_speaker_embedding(spk_id) return {"spk_id": spk_id}