Skip to content
openlibrary edited this page Aug 13, 2010 · 8 revisions

We will use "SoundManager2":http://www.schillmania.com/projects/soundmanager2/ to play mp3s and ogg files. The API looks like this:


var aSoundObject = soundManager.createSound({
 id:'mySound2',
 url:'http://tts.archive.org/foo.mp3'
});
aSoundObject.play();

h2. Pseudocode

Pseudocode for proposed read-it-to-me feature:


function startSpeaking(startPage) {
    for (page= startPage; page

With prefetching of audio, the code might look like this:


function startSpeaking(startPage) {
    for (page= startPage; page

h2. prefetchAudio()

prefetchAudio() passes a string to a web service to be rendered by a Linux Text-to-Speech engine, and returns immediately.


BookReader.prototype.prefetchAudio = function(string, format) {
    //use jquery.ajax to call a web service that renders a string into an mp3 or ogg

    //TODO: handle errors
    $.get('renderAudio.py',  {str:"This is some sample text to speak.", format:"mp3"});
}

h2. speakAudio()


function speakAudio(string, format) {
    md5 = hex_md5(string);   
    url = 'http://tts.archive.org/getAudio.py?id=' + md5 + '&format=' + format

    //soundmanager2 call to play the mp3/ogg file specified by url
    var mySound = soundManager.createSound({
    id: 'someSound',
    url: url,
    volume: 50,
    onfinish: playNextSentence()
});
mySound.play();  
}

h2. startSpeaking


function startSpeaking(startPage) {
    this.audioFormat = getPreferredAudioFormat();
    showDialog("starting audio engine...");

    text = getTextForPage(startPage);
    //prefetch
    for (i=0; i<5; i++) {
        sentence = getSentence(i);
        prefetchAudio(sentence, audioFormat)
    }

    sleep(5);

    this.isSpeaking = true;
    this.speakingSentence = 0;
    this.speakingLeaf = startLeaf;
    this.sentences = getSentencesForText(text)
    playNextSentence();
}

h2. playNextSentence



function playNextSentence() {

    renderAudio(this.sentences[this.speakingSentence]);

    this.speakingSentence++;

    if (endofPage()) {
        this.speakingLeaf++;
        if (this.speakingLeaf >= this.numPages) {
            stopSpeaking();
            return;
        }
        this.speakingSentence = 0;
        text = getTextForPage(this.speakingLeaf);
        this.sentences = getSentencesForText(text);
    }

    prefetchAudio(this.sentences[this.sentenceNum], this.format);
}

h2. renderAudio.py


#!/usr/bin/python

"""This script takes a string and adds it to the text-to-speech work queue"""

import sqlite3
import cgi
import hashlib
import time

form = cgi.FieldStorage()

string = form.getvalue('string') #todo: handle errors
format = form.getvalue('format')

print "Content-Type: text/html\n\n"
print string
print type(string)

conn = sqlite3.connect('ttsQueue.sqlite3')
c = conn.cursor()

m = hashlib.md5()
m.update(string)
jobId = m.hexdigest() + '.' + format

timestamp = int(time.time())
t = (jobId, string, format, 'waiting', timestamp)
c.execute("""insert into queue values (?, ?, ?, ?, ?)""", t)
conn.commit()
c.close()

h2. getAudio.py



#!/usr/bin/python

"""
This script issues a 302 redirect to a tts audio file, if it has already been
rendered by ttsWorker.py. If the audio file is still processing, this script
will poll the db for the file every five seconds, for up to thirty seconds.
"""

import cgi
import sqlite3

conn = sqlite3.connect('ttsQueue.sqlite3')

c = conn.cursor()

form = cgi.FieldStorage()
jobId = form['id']
t=(jobId, )

for i in range(5):

    c.execute('select * from queue where jobId=?', t)

    row = c.fetchone()
    status = row[3];

    if 'processed' == status:
        redirect(jobId)
    else:
        time.sleep(5)

h2. ttsworker.py



import sqlite3
#from subprocess import Popen, PIPE
import commands
import time
import pipes

audioDir = '/var/www/audio'

conn = sqlite3.connect('ttsQueue.sqlite3')
c = conn.cursor()

def tts(string, format, jobId):
	#todo: deal with ogg format
	path = '%s/%s' % (audioDir, jobId) #jobid is hash.mp3 or hash.ogg
	cmd = 'echo ' + pipes.quote(string) + ' | text2wave | lame -f - ' + path
	print cmd
	(status, output) = commands.getstatusoutput(cmd)
	#p1 = Popen(['echo', string], stdout=PIPE)
	#p2 = Popen(['text2wave'], stdout=PIPE)
	#p3 = Popen(['lame', '-f', '-', 'path'], stdout=PIPE)
	#(stdoutdata, stderrdata) = p3.communicate()
	#print '  got ' + stdoutdata
	#if status != 0:
		#error

def updateDb(jobId):
	t = ('done', jobId )
	c.execute("""update queue set status=? where jobId=?""", t)
	conn.commit()

while True:
	c.execute("select * from queue where status = 'waiting' limit 1")
	row = c.fetchone()
	if row is not None:
		jobId = row[0]
		string = row[1]
		format = row[2]

		print 'got job ' + jobId

		tts(string, format, jobId)
		updateDb(jobId)
	else:
		print 'waiting for task'
	time.sleep(0.5)

h2. createTTSQueue.py


#!/usr/bin/python

"""
This script creates the ttsQueue.sqlite3 db
"""


import sqlite3

conn = sqlite3.connect('ttsQueue.sqlite3')

c = conn.cursor()

c.execute('''create table queue (jobId text primary key, string text, format text, status text, timestamp integer)''')

conn.commit()
c.close()

Clone this wiki locally