Skip to content

Commit

Permalink
support playht3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
xquanluu committed Sep 27, 2024
1 parent 1846203 commit 1a04fd7
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 10 deletions.
35 changes: 32 additions & 3 deletions lib/synth-audio.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ const {
createKryptonClient,
createRivaClient,
noopLogger,
makeFilePath
makeFilePath,
makePlayhtKey
} = require('./utils');
const getNuanceAccessToken = require('./get-nuance-access-token');
const getVerbioAccessToken = require('./get-verbio-token');
Expand Down Expand Up @@ -244,7 +245,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
});
break;
case 'playht':
audioBuffer = await synthPlayHT(logger, {
audioBuffer = await synthPlayHT(client, logger, {
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming, filePath
});
break;
Expand Down Expand Up @@ -755,12 +756,38 @@ const synthElevenlabs = async(logger, {
}
};

const synthPlayHT = async(logger, {
const synthPlayHT = async(client, logger, {
credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
}) => {
const {api_key, user_id, voice_engine, options: credOpts} = credentials;
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');

let synthesizeUrl = 'https://api.play.ht/api/v2/tts/stream';

// If model is play3.0, the stream url is provided by v3 auth endpoint which is
// including jwt token as request params.
if (voice_engine === 'Play3.0') {
try {
const post = bent('https://api.play.ht', 'POST', 'json', 201, {
'AUTHORIZATION': api_key,
'X-USER-ID': user_id,
'Accept': 'application/json'
});
const key = makePlayhtKey(api_key);
const url = await client.get(key);
if (!url) {
const {inference_address, expires_at_ms} = await post('/api/v3/auth');
synthesizeUrl = inference_address;
const expiry = Math.floor((expires_at_ms - Date.now()) / 1000 - 30);
await client.set(key, inference_address, 'EX', expiry);
}
} catch (err) {
logger.info({err}, 'synth PlayHT returned error for authentication version 3.0');
stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
throw err;
}
}

/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
let params = '';
Expand All @@ -769,6 +796,7 @@ const synthPlayHT = async(logger, {
params += ',vendor=playht';
params += `,voice=${voice}`;
params += `,voice_engine=${voice_engine}`;
params += `,synthesize_url=${synthesizeUrl}`;
params += ',write_cache_file=1';
if (opts.quality) params += `,quality=${opts.quality}`;
if (opts.speed) params += `,speed=${opts.speed}`;
Expand All @@ -794,6 +822,7 @@ const synthPlayHT = async(logger, {
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});

const mp3 = await post('/api/v2/tts/stream', {
text,
voice,
Expand Down
6 changes: 6 additions & 0 deletions lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ function makeAwsKey(awsAccessKeyId) {
return `aws:${hash.digest('hex')}`;
}

function makePlayhtKey(apiKey) {
const hash = crypto.createHash('sha1');
hash.update(apiKey);
return `playht:${hash.digest('hex')}`;
}
function makeVerbioKey(client_id) {
const hash = crypto.createHash('sha1');
hash.update(client_id);
Expand Down Expand Up @@ -171,6 +176,7 @@ module.exports = {
makeSynthKey,
makeNuanceKey,
makeIbmKey,
makePlayhtKey,
makeAwsKey,
makeVerbioKey,
getNuanceAccessToken,
Expand Down
22 changes: 15 additions & 7 deletions test/synth.js
Original file line number Diff line number Diff line change
Expand Up @@ -574,30 +574,30 @@ test('Elevenlabs speech synth tests', async(t) => {
t.end(err);
}
client.quit();
})
});

test('PlayHT speech synth tests', async(t) => {
const testPlayHT = async(t, voice_engine) => {
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);

if (!process.env.PLAYHT_API_KEY || !process.env.PLAYHT_USER_ID) {
t.pass('skipping PlayHT speech synth tests since PLAYHT_API_KEY or PLAYHT_USER_ID is/are not provided');
return t.end();
}
const text = 'Hi there and welcome to jambones!';
const text = 'Hi there and welcome to jambones! ' + Date.now();
try {
let opts = await synthAudio(stats, {
const opts = await synthAudio(stats, {
vendor: 'playht',
credentials: {
api_key: process.env.PLAYHT_API_KEY,
user_id: process.env.PLAYHT_USER_ID,
voice_engine: 'PlayHT2.0-turbo',
voice_engine,
options: JSON.stringify({
quality: "medium",
quality: 'medium',
speed: 1,
seed: 1,
temperature: 1,
emotion: "female_happy",
emotion: 'female_happy',
voice_guidance: 3,
style_guidance: 20,
text_guidance: 1,
Expand All @@ -615,6 +615,14 @@ test('PlayHT speech synth tests', async(t) => {
t.end(err);
}
client.quit();
};

test('PlayHT speech synth tests', async(t) => {
await testPlayHT(t, 'PlayHT2.0-turbo');
});

test('PlayHT3.0 speech synth tests', async(t) => {
await testPlayHT(t, 'Play3.0');
});

test('rimelabs speech synth tests', async(t) => {
Expand Down

0 comments on commit 1a04fd7

Please sign in to comment.