-
Notifications
You must be signed in to change notification settings - Fork 18
/
asr_speechmatics.py
215 lines (181 loc) · 8.82 KB
/
asr_speechmatics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
"""
Script for integrating with the Speechmatics API.
Most of the code comes from https://github.com/speechmatics/speechmatics_python
"""
import codecs
import json
import logging
import time
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import requests
import pprint
class SpeechmaticsError(Exception):
"""
For errors that are specific to Speechmatics systems and pipelines.
"""
def __init__(self, msg, returncode=1):
super(SpeechmaticsError, self).__init__(msg)
self.msg = msg
self.returncode = returncode
def __str__(self):
return self.msg
class SpeechmaticsClient(object):
"""
A simple client to interact with the Speechmatics REST API
Documentation at https://app.speechmatics.com/api-details
"""
def __init__(self, api_user_id, api_token, base_url='https://api.speechmatics.com/v1.0'):
self.api_user_id = api_user_id
self.api_token = api_token
self.base_url = base_url
def job_post(self, audio_file, lang, text_file=None):
"""
Upload a new audio file to speechmatics for transcription
If text file is specified upload that as well for an alignment job
If upload suceeds then this method will return the id of the new job
If succesful returns an integer representing the job id
"""
url = "".join([self.base_url, '/user/', self.api_user_id, '/jobs/'])
params = {'auth_token': self.api_token}
try:
files = {'data_file': open(audio_file, "rb")}
except IOError as ex:
logging.error("Problem opening audio file {}".format(audio_file))
raise
if text_file:
try:
files['text_file'] = open(text_file, "rb")
except IOError as ex:
logging.error("Problem opening text file {}".format(text_file))
raise
data = {"model": lang}
request = requests.post(url, data=data, files=files, params=params)
if request.status_code == 200:
json_out = json.loads(request.text)
return json_out['id']
else:
err_msg = "Attempt to POST job failed with code {}\n".format(request.status_code)
if request.status_code == 400:
err_msg += ("Common causes of this error are:\n"
"Malformed arguments\n"
"Missing data file\n"
"Absent / unsupported language selection.")
elif request.status_code == 401:
err_msg += ("Common causes of this error are:\n"
"Invalid user id or authentication token.")
elif request.status_code == 403:
err_msg += ("Common causes of this error are:\n"
"Insufficient credit\n"
"User id not in our database\n"
"Incorrect authentication token.")
elif request.status_code == 429:
err_msg += ("Common causes of this error are:\n"
"You are submitting too many POSTs in a short period of time.")
elif request.status_code == 503:
err_msg += ("Common causes of this error are:\n"
"The system is temporarily unavailable or overloaded.\n"
"Your POST will typically succeed if you try again soon.")
err_msg += ("\nIf you are still unsure why your POST failed please contact speechmatics:"
"support@speechmatics.com")
raise SpeechmaticsError(err_msg)
def job_details(self, job_id):
"""
Checks on the status of the given job.
If successfuly returns a dictionary of job details.
"""
params = {'auth_token': self.api_token}
url = "".join([self.base_url, '/user/', self.api_user_id, '/jobs/', str(job_id), '/'])
request = requests.get(url, params=params)
if request.status_code == 200:
return json.loads(request.text)['job']
else:
err_msg = ("Attempt to GET job details failed with code {}\n"
"If you are still unsure why your POST failed please contact speechmatics:"
"support@speechmatics.com").format(request.status_code)
raise SpeechmaticsError(err_msg)
def get_output(self, job_id, frmat, job_type):
"""
Downloads transcript for given transcription job.
If successful returns the output.
"""
params = {'auth_token': self.api_token}
if frmat and job_type == 'transcription':
params['format'] = 'txt'
if frmat and job_type == 'alignment':
params['tags'] = 'one_per_line'
url = "".join([self.base_url, '/user/', self.api_user_id, '/jobs/', str(job_id), '/', job_type])
request = requests.get(url, params=params)
if request.status_code == 200:
return request.text
else:
err_msg = ("Attempt to GET job details failed with code {}\n"
"If you are still unsure why your POST failed please contact speechmatics:"
"support@speechmatics.com").format(request.status_code)
raise SpeechmaticsError(err_msg)
def parse_args():
"""
Parse command line arguments
"""
# Parse the arguments
parser = ArgumentParser(
description='Processes a job through the Speechmatics API',
formatter_class=ArgumentDefaultsHelpFormatter,
)
parser.add_argument('-a', '--audio', type=str, required=True,
help="Audio file to be processed")
parser.add_argument('-t', '--text', type=str, required=False,
help="Text file to be processed (only required for alignment jobs)", default=None)
parser.add_argument('-o', '--output', type=str, required=False,
help="Output filename (will print to terminal if not specified)", default=None)
parser.add_argument('-i', '--id', type=str, required=True,
help="Your Speechmatics user_id")
parser.add_argument('-k', '--token', type=str, required=True,
help="Your Speechmatics API Authentication Token")
parser.add_argument('-l', '--lang', type=str, required=True,
help="Code of language to use (e.g., en-US)")
parser.add_argument('-f', '--format', action='store_true', required=False,
help="Return results in alternate format.\n"
"Default for transcription is json, alternate is text.\n"
"Default for alignment is one timing per word, alternate is one per line")
return parser.parse_args()
def transcribe_speechmatics(speechmatics_id, speechmatics_token, speech_filepath, language):
"""
Example way to use the Speechmatics Client to process a job
"""
logging.basicConfig(level=logging.INFO)
#opts = parse_args()
client = SpeechmaticsClient(speechmatics_id, speechmatics_token)
text = ''
job_id = client.job_post(speech_filepath, language, text)
logging.info("Your job has started with ID {}".format(job_id))
details = client.job_details(job_id)
while details[u'job_status'] not in ['done', 'expired', 'unsupported_file_format', 'could_not_align']:
logging.info("Waiting for job to be processed. Will check again in {} seconds".format(details['check_wait']))
wait_s = details['check_wait']
time.sleep(wait_s)
details = client.job_details(job_id)
if details['job_status'] == 'unsupported_file_format':
raise SpeechmaticsError("File was in an unsupported file format and could not be transcribed. "
"You have been reimbursed all credits for this job.")
if details['job_status'] == 'could_not_align':
raise SpeechmaticsError("Could not align text and audio file. "
"You have been reimbursed all credits for this job.")
logging.info("Processing complete, getting output")
if details['job_type'] == 'transcription':
job_type = 'transcript'
elif details['job_type'] == 'alignment':
job_type = 'alignment'
output_format = 'json'
#output_format = 'txt'
output = client.get_output(job_id, format, job_type)
logging.info("Your job output:")
if job_type == 'transcript' and output_format:
print(json.dumps(output, indent=4))
output_dictionary = json.loads(output)
pprint.pprint(output_dictionary)
predicted_transcription =''
for word in output_dictionary['words']:
predicted_transcription+= word['name'] + ' '
predicted_transcription = predicted_transcription.strip()
#print('predicted_transcription: {0}'.format(predicted_transcription))
return predicted_transcription, output