Merge branch 'master' into fix/spellcheck

# Conflicts: # edenai_apis/apis/cohere/cohere_api.py
edenai · Nov 15, 2023 · 5132879 · 5132879
2 parents a49f0a3 + 68efc93
commit 5132879
Show file tree

Hide file tree

Showing 11 changed files with 486 additions and 156 deletions.
diff --git a/AVAILABLES_FEATURES_AND_PROVIDERS.md b/AVAILABLES_FEATURES_AND_PROVIDERS.md
@@ -237,7 +237,8 @@
 | | lovoai |
 | | microsoft |
 | | openai |
-| **text_to_speech_async** | lovoai |
+| **text_to_speech_async** | amazon |
+| | lovoai |
 
 </details>
 <details><summary>translation</summary>
@@ -319,6 +320,7 @@
 |----------|-------------|
 | **audio** | speech_to_text_async |
 | | text_to_speech |
+| | text_to_speech_async |
 | **image** | explicit_content |
 | | face_compare |
 | | face_detection |

diff --git a/edenai_apis/apis/amazon/amazon_audio_api.py b/edenai_apis/apis/amazon/amazon_audio_api.py
@@ -1,9 +1,10 @@
 import json
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Literal
 import urllib
 import uuid
 import base64
+import requests
 from io import BufferedReader, BytesIO
 from botocore.exceptions import BotoCoreError, ClientError
 from edenai_apis.apis.amazon.helpers import (
@@ -36,9 +37,13 @@
     AsyncResponseType,
     ResponseType,
 )
-from edenai_apis.utils.upload_s3 import upload_file_bytes_to_s3, USER_PROCESS
+from edenai_apis.utils.upload_s3 import upload_file_bytes_to_s3, USER_PROCESS, get_s3_file_url, URL_LONG_PERIOD, \
+    get_cloud_front_file_url, s3_client_load
 
-from .config import audio_voices_ids
+from .config import audio_voices_ids, storage_clients
+from edenai_apis.features.audio import TextToSpeechAsyncDataClass
+from edenai_apis.loaders.data_loader import ProviderDataEnum
+from edenai_apis.loaders.loaders import load_provider
 
 
 class AmazonAudioApi(AudioInterface):
@@ -338,3 +343,76 @@ def audio__speech_to_text_async__get_job_result(
         return AsyncPendingResponseType[SpeechToTextAsyncDataClass](
             provider_job_id=provider_job_id
         )
+    def audio__text_to_speech_async__launch_job(
+            self,
+            language: str,
+            text: str,
+            option: str,
+            voice_id: str,
+            audio_format: str,
+            speaking_rate: int,
+            speaking_pitch: int,
+            speaking_volume: int,
+            sampling_rate: int,
+            file_url: str = ""
+    ) -> AsyncLaunchJobResponseType:
+        _, voice_id_name, engine = voice_id.split("_")
+        engine = engine.lower()
+
+        params = {"Engine": engine, "VoiceId": voice_id_name, "OutputFormat": "mp3", "OutputS3BucketName": self.api_settings["users_resource_bucket"]}
+
+        text = generate_right_ssml_text(
+            text, speaking_rate, speaking_pitch, speaking_volume
+        )
+
+        ext, audio_format, sampling = get_right_audio_support_and_sampling_rate(
+            audio_format, sampling_rate
+        )
+
+        params_update = {"OutputFormat": audio_format, "Text": text}
+        if sampling:
+            params_update["SampleRate"] = str(sampling)
+
+        params.update({**params_update})
+
+        if is_ssml(text):
+            params["TextType"] = "ssml"
+
+        response = handle_amazon_call(self.clients["texttospeech"].start_speech_synthesis_task, **params)
+        synthesis_task = response["SynthesisTask"]
+        if synthesis_task["TaskStatus"] == "failed":
+            raise ProviderException(synthesis_task.get("TaskStatusReason", "Amazon returned a job status: failed"))
+        print(synthesis_task["TaskId"])
+        return AsyncLaunchJobResponseType(provider_job_id=synthesis_task["TaskId"])
+
+    def audio__text_to_speech_async__get_job_result(
+        self,
+        provider_job_id: str
+    ) -> AsyncBaseResponseType[TextToSpeechAsyncDataClass]:
+        params = {"TaskId": provider_job_id}
+        response = handle_amazon_call(self.clients["texttospeech"].get_speech_synthesis_task, **params)
+        synthesis_task = response["SynthesisTask"]
+        status = synthesis_task["TaskStatus"]
+        if status == "failed":
+            raise ProviderException(synthesis_task.get("TaskStatusReason", "Amazon returned a job status: failed"))
+        elif status == "inProgress" or status == "scheduled":
+            return AsyncPendingResponseType[TextToSpeechAsyncDataClass](
+                provider_job_id=provider_job_id
+            )
+        else:
+            output_uri = synthesis_task.get("OutputUri", "")
+            s3_client_load()
+            file_url = get_cloud_front_file_url(output_uri.split('/')[-1], URL_LONG_PERIOD)
+            synthesis_task["OutputUri"] = file_url
+            response_file = requests.get(file_url)
+            print(response_file.content)
+            audio_content = BytesIO(response_file.content)
+            audio = base64.b64encode(audio_content.read()).decode("utf-8")
+            standardized_response = TextToSpeechAsyncDataClass(
+                audio_resource_url=file_url, audio=audio, voice_type=1
+            )
+            return AsyncResponseType[TextToSpeechAsyncDataClass](
+                original_response=synthesis_task,
+                standardized_response=standardized_response,
+                provider_job_id=provider_job_id
+            )
diff --git a/edenai_apis/apis/amazon/config.py b/edenai_apis/apis/amazon/config.py
@@ -12,7 +12,7 @@ def clients(api_settings: Dict) -> Dict:
         ),
         "texttospeech": boto3.client(
             "polly",
-            region_name=api_settings["region_name"],
+            region_name=api_settings["ressource_region"],
             aws_access_key_id=api_settings["aws_access_key_id"],
             aws_secret_access_key=api_settings["aws_secret_access_key"],
         ),
@@ -89,6 +89,12 @@ def storage_clients(api_settings: Dict) -> Dict:
             aws_access_key_id=api_settings["aws_access_key_id"],
             aws_secret_access_key=api_settings["aws_secret_access_key"],
         ),
+        "texttospeech": boto3.resource(
+            "s3",
+            region_name=api_settings["ressource_region"],
+            aws_access_key_id=api_settings["aws_access_key_id"],
+            aws_secret_access_key=api_settings["aws_secret_access_key"],
+        )
     }
 
 

diff --git a/edenai_apis/apis/amazon/info.json b/edenai_apis/apis/amazon/info.json
@@ -208,6 +208,163 @@
         }
       },
       "version": "boto3 (v1.15.18)"
+    },
+    "text_to_speech_async": {
+      "constraints": {
+        "languages": [
+          "arb",
+          "en-US",
+          "fr-FR",
+          "es-ES",
+          "de-DE",
+          "en-GB",
+          "nl-NL",
+          "it-IT",
+          "ja-JP",
+          "cmn-CN",
+          "ru-RU",
+          "pt-BR",
+          "da-DK",
+          "ko-KR",
+          "pt-PT",
+          "en-AU",
+          "en-IN",
+          "hi-IN",
+          "is-IS",
+          "nb-NO",
+          "pl-PL",
+          "ro-RO",
+          "es-MX",
+          "es-US",
+          "sv-SE",
+          "tr-TR",
+          "cy-GB"
+        ],
+        "audio_format": [
+          "mp3",
+          "ogg",
+          "pcm"
+        ],
+        "voice_ids": {
+          "MALE": [
+            "da-DK_Mads_Standard",
+            "nl-NL_Ruben_Standard",
+            "en-AU_Russell_Neural",
+            "en-GB_Brian_Standard",
+            "en-GB_Brian_Neural",
+            "en-GB_Arthur_Neural",
+            "en-US_Joey_Standard",
+            "en-US_Joey_Neural",
+            "en-US_Justin_Standard",
+            "en-US_Justin_Neural",
+            "en-US_Kevin_Neural",
+            "en-US_Matthew_Standard",
+            "en-US_Matthew_Neural",
+            "en-US_Stephen_Neural",
+            "fr-FR_Mathieu_Standard",
+            "fr-FR_Remi_Neural",
+            "fr-CA_Liam_Neural",
+            "de-DE_Hans_Standard",
+            "is-IS_Karl_Standard",
+            "de-DE_Daniel_Neural",
+            "is-IS_Karl_Neural",
+            "it-IT_Giorgio_Standard",
+            "it-IT_Adriano_Neural",
+            "ja-JP_Takumi_Standard",
+            "ja-JP_Takumi_Neural",
+            "pl-PL_Jacek_Standard",
+            "pl-PL_Jan_Standard",
+            "pt-BR_Ricardo_Standard",
+            "pt-BR_Thiago_Neural",
+            "pt-PT_Cristiano_Standard",
+            "ru-RU_Maxim_Standard",
+            "es-ES_Enrique_Standard",
+            "es-ES_Sergio_Neural",
+            "es-MX_Andres_Neural",
+            "es-US_Miguel_Standard",
+            "es-US_Pedro_Neural"
+          ],
+          "FEMALE": [
+            "arb_Zeina_Standard",
+            "ar-AE_Hala_Neural",
+            "ca-ES_Arlet_Neural",
+            "yue-CN_Hiujin_Neural",
+            "cmn-CN_Zhiyu_Standard",
+            "cmn-CN_Zhiyu_Neural",
+            "da-DK_Naja_Standard",
+            "nl-NL_Laura_Neural",
+            "nl-NL_Lotte_Standard",
+            "en-AU_Nicole_Neural",
+            "en-AU_Olivia_Standard",
+            "en-GB_Amy_Standard",
+            "en-GB_Amy_Neural",
+            "en-GB_Emma_Standard",
+            "en-GB_Emma_Neural",
+            "en-IN_Aditi_Standard",
+            "en-IN_Raveena_Standard",
+            "en-IN_Kajal_Neural",
+            "en-NZ_Aria_Neural",
+            "en-ZA_Ayanda_Neural",
+            "en-US_Ivy_Standard",
+            "en-US_Ivy_Neural",
+            "en-US_Joanna_Standard",
+            "en-US_Joanna_Neural",
+            "en-US_Kendra_Standard",
+            "en-US_Kendra_Neural",
+            "en-US_Kimberly_Standard",
+            "en-US_Kimberly_Neural",
+            "en-US_Salli_Standard",
+            "en-US_Salli_Neural",
+            "en-US_Ruth_Neural",
+            "fi-FI_Suvi_Neural",
+            "fr-FR_Celine_Standard",
+            "fr-FR_Lea_Standard",
+            "fr-FR_Lea_Neural",
+            "fr-CA_Chantal_Standard",
+            "fr-CA_Gabrielle_Neural",
+            "de-DE_Marlene_Standard",
+            "de-DE_Vicki_Standard",
+            "de-DE_Vicki_Neural",
+            "de-AT_Hannah_Neural",
+            "hi-IN_Aditi_Standard",
+            "hi-IN_Kajal_Neural",
+            "is-IS_Dora_Standard",
+            "it-IT_Carla_Standard",
+            "it-IT_Bianca_Standard",
+            "it-IT_Bianca_Neural",
+            "ja-JP_Mizuki_Standard",
+            "ja-JP_Kazuha_Neural",
+            "ja-JP_Tomoko_Neural",
+            "ko-KR_Seoyeon_Standard",
+            "ko-KR_Seoyeon_Neural",
+            "nb-NO_Liv_Standard",
+            "pl-PL_Ewa_Standard",
+            "pl-PL_Maja_Standard",
+            "pl-PL_Ola_Neural",
+            "pt-BR_Camila_Standard",
+            "pt-BR_Camila_Neural",
+            "pt-BR_Vitoria_Standard",
+            "pt-BR_Vitoria_Neural",
+            "pt-PT_Ines_Standard",
+            "pt-PT_Ines_Neural",
+            "ro-RO_Carmen_Standard",
+            "ru-RU_Tatyana_Standard",
+            "es-ES_Conchita_Standard",
+            "es-ES_Lucia_Standard",
+            "es-ES_Lucia_Neural",
+            "es-MX_Mia_Standard",
+            "es-MX_Mia_Neural",
+            "es-US_Lupe_Standard",
+            "es-US_Lupe_Neural",
+            "es-US_Penelope_Standard",
+            "sv-SE_Astrid_Standard",
+            "sv-SE_Elin_Neural",
+            "tr-TR_Filiz_Standard",
+            "cy-GB_Gwyneth_Standard"
+          ]
+        }
+      },
+      "version": "boto3 (v1.15.18)"
     }
   },
   "ocr": {
@@ -538,7 +695,7 @@
       "version": "boto3 (v1.15.18)"
     },
     "object_detection": {
-      "constraints":{
+      "constraints": {
       },
       "version": "boto3 (v1.15.18)"
     },