Skip to content

Latest commit

 

History

History
163 lines (129 loc) · 5.69 KB

pronunciation-dictionaries-add-from-file.mdx

File metadata and controls

163 lines (129 loc) · 5.69 KB
title openapi
Add from file
post /v1/pronunciation-dictionaries/add-from-file

Adding a pronunciation dictionary

Here is some example code for uploading a pronunciation dictionary and printing the response pronunciation_dictionary_id and version_id. You'll require these identifiers in the request body if you intend to use pronunciation_dictionary_locators.

All you will need to do is replace API_KEY_HERE with your actual API key and PATH_HERE with the actual path to the PLS file you want to upload.

If you need help in understanding how to properly format a PLS / pronunciation dictionary, please refer to the guide here.

There is currently no way to fetch or update old dictionaries uploaded. Therefore, you will need to keep track of the identifiers. If you need to update the dictionary, you will have to upload a new one.

import requests
import os

# Define your API key and the base URL for the Eleven Labs API
XI_API_KEY = "API_KEY_HERE"
BASE_URL = "https://api.elevenlabs.io/v1"

# Setup the headers for HTTP requests to include the API key and accept JSON responses
headers = {
    "Accept": "application/json",
    "xi-api-key": XI_API_KEY
}

def upload_pronunciation_dictionary(file_path, name, description):
    """
    Uploads a pronunciation dictionary file to the Eleven Labs API and returns its ID and version ID.

    Parameters:
    - file_path: The local path to the pronunciation dictionary file.
    - name: A name for the pronunciation dictionary.
    - description: A description of the pronunciation dictionary.

    Returns:
    A tuple containing the pronunciation dictionary ID and version ID if successful, None otherwise.
    """
    # Construct the URL for adding a pronunciation dictionary from a file
    url = f"{BASE_URL}/pronunciation-dictionaries/add-from-file"
    
    # Prepare the file and data to be sent in the request
    files = {'file': open(file_path, 'rb')}
    data = {'name': name, 'description': description}
    
    # Make the POST request to upload the dictionary
    response = requests.post(url, headers=headers, files=files, data=data)
    
    # Handle the response
    if response.status_code == 200:
        # Parse the response JSON to get the pronunciation dictionary and version IDs
        data = response.json()
        pronunciation_dictionary_id = data.get('id')
        version_id = data.get('version_id')
        
        # Return the IDs
        return pronunciation_dictionary_id, version_id
    else:
        # Print an error message if the request failed
        print("Error:", response.status_code)
        return None, None

def main():
    """
    The main function to upload a pronunciation dictionary.
    """
    # Define the path to your pronunciation dictionary file and its metadata
    file_path = r"PATH_HERE"
    name = "Your Pronunciation Dictionary"
    description = "My custom pronunciation dictionary"
    
    # Upload the pronunciation dictionary and receive its ID and version ID
    pronunciation_dictionary_id, version_id = upload_pronunciation_dictionary(file_path, name, description)
    
    # Check if the upload was successful
    if pronunciation_dictionary_id and version_id:
        print("Pronunciation Dictionary Uploaded Successfully!")
        print("Pronunciation Dictionary ID:", pronunciation_dictionary_id)
        print("Version ID:", version_id)
    else:
        print("Failed to upload pronunciation dictionary.")

# Ensure this script block runs only when executed as a script, not when imported
if __name__ == "__main__":
    main()

Using a pronunciation dictionary

Here is some example code on how to use these identifiers or locators in your text-to-speech call.

import requests

# Set your API key and base URL
XI_API_KEY = "API_KEY_HERE"
BASE_URL = "https://api.elevenlabs.io/v1"
VOICE_ID = "TxGEqnHWrfWFTfGW9XjX"

# Headers for the request
headers = {
    "Accept": "application/json",
    "xi-api-key": XI_API_KEY
}

def text_to_speech(text, pronunciation_dictionary_id, version_id):
    """
    Sends a text to speech request using a pronunciation dictionary.
    Returns:
    An audio file.
    """
    # Define the URL for the text-to-speech endpoint
    url = f"{BASE_URL}/text-to-speech/{VOICE_ID}"
    
    # Payload for the request
    payload = {
        "model_id": "eleven_monolingual_v1",
        "pronunciation_dictionary_locators": [
            {
                "pronunciation_dictionary_id": pronunciation_dictionary_id,
                "version_id": version_id
            }
        ],
        "text": text,
        "voice_settings": {
            "stability": 0.5,
            "similarity_boost": 0.8,
            "style": 0.0,
            "use_speaker_boost": True
        }
    }
    
    # Make the POST request
    response = requests.post(url, json=payload, headers=headers)
    
    # Check the response status
    if response.status_code == 200:
        # Here you can save the audio response to a file if needed
        print("Audio file generated successfully.")
        
        # Save the audio to a file
        with open("output_audio.mp3", "wb") as audio_file:
            audio_file.write(response.content)
    else:
        print("Error:", response.status_code)

def main():
    # Example text and dictionary IDs (replace with actual values)
    text = "Hello, world! I can now use pronunciation dictionaries."
    pronunciation_dictionary_id = "PD_ID_HERE"
    version_id = "VERSION_ID_HERE"
    
    # Call the text to speech function
    text_to_speech(text, pronunciation_dictionary_id, version_id)

if __name__ == "__main__":
    main()