diff --git a/edenai_apis/apis/amazon/amazon_audio_api.py b/edenai_apis/apis/amazon/amazon_audio_api.py index ef695a7b..247abae0 100644 --- a/edenai_apis/apis/amazon/amazon_audio_api.py +++ b/edenai_apis/apis/amazon/amazon_audio_api.py @@ -1,18 +1,20 @@ +import base64 import json import urllib import uuid -import base64 -import requests from io import BufferedReader, BytesIO -from botocore.exceptions import BotoCoreError, ClientError from pathlib import Path from typing import Optional +import requests +from botocore.exceptions import BotoCoreError, ClientError + from edenai_apis.apis.amazon.helpers import ( generate_right_ssml_text, get_right_audio_support_and_sampling_rate, handle_amazon_call, ) +from edenai_apis.features.audio import TextToSpeechAsyncDataClass from edenai_apis.features.audio.audio_interface import AudioInterface from edenai_apis.features.audio.speech_to_text_async.speech_to_text_async_dataclass import ( SpeechDiarization, @@ -22,9 +24,9 @@ from edenai_apis.features.audio.text_to_speech.text_to_speech_dataclass import ( TextToSpeechDataClass, ) -from edenai_apis.utils.exception import ( - ProviderException, -) +from edenai_apis.loaders.data_loader import ProviderDataEnum +from edenai_apis.loaders.loaders import load_provider +from edenai_apis.utils.exception import ProviderException from edenai_apis.utils.ssml import is_ssml from edenai_apis.utils.types import ( AsyncBaseResponseType, @@ -34,18 +36,15 @@ ResponseType, ) from edenai_apis.utils.upload_s3 import ( - upload_file_bytes_to_s3, - USER_PROCESS, - get_s3_file_url, URL_LONG_PERIOD, + USER_PROCESS, get_cloud_front_file_url, + get_s3_file_url, s3_client_load, + upload_file_bytes_to_s3, ) from .config import audio_voices_ids, storage_clients -from edenai_apis.features.audio import TextToSpeechAsyncDataClass -from edenai_apis.loaders.data_loader import ProviderDataEnum -from edenai_apis.loaders.loaders import load_provider class AmazonAudioApi(AudioInterface): @@ -223,7 +222,12 @@ def audio__speech_to_text_async__launch_job( ) self._launch_transcribe( - filename, frame_rate, language, speakers, format=export_format + filename, + frame_rate, + language, + speakers, + format=export_format, + provider_params=provider_params, ) return AsyncLaunchJobResponseType(provider_job_id=filename) @@ -293,6 +297,8 @@ def audio__speech_to_text_async__get_job_result( ] with urllib.request.urlopen(json_res) as url: original_response = json.loads(url.read().decode("utf-8")) + # add metadata to the response (settings, result/subtitle urls etc.) + original_response.update(job_details) # diarization diarization_entries = [] words_info = original_response["results"]["items"]