-
Notifications
You must be signed in to change notification settings - Fork 4
/
transcribeUtils.py
executable file
·85 lines (72 loc) · 4.11 KB
/
transcribeUtils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# ==================================================================================
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================
#
# transcribeUtils.py
# by: Rob Dachowski
# For questions or feedback, please contact robdac@amazon.com
#
# Purpose: The program provides a number of utility functions for leveraging the Amazon Transcribe API
#
# Change Log:
# 6/29/2018: Initial version
#
# ==================================================================================
import boto3
import uuid
import requests
# ==================================================================================
# Function: createTranscribeJob
# Purpose: Function to format the input parameters and invoke the Amazon Transcribe service
# Parameters:
# region - the AWS region in which to run AWS services (e.g. "us-east-1")
# bucket - the Amazon S3 bucket name (e.g. "mybucket/") found in region that contains the media file for processing.
# mediaFile - the content to process (e.g. "myvideo.mp4")
#
# ==================================================================================
def createTranscribeJob(region, bucket, mediaFile):
# Set up the Transcribe client
transcribe = boto3.client('transcribe')
# Set up the full uri for the bucket and media file
mediaUri = "https://" + "s3-" + region + ".amazonaws.com/" + bucket + '/' + mediaFile
print("\tCreating Job: " + "transcribe_" + mediaFile + " for " + mediaUri)
# Use the uuid functionality to generate a unique job name. Otherwise, the Transcribe service will return an error
response = transcribe.start_transcription_job(TranscriptionJobName = "transcribe_" + uuid.uuid4().hex + "_" + mediaFile , \
LanguageCode = "en-US", \
MediaFormat = "mp4", \
Media = { "MediaFileUri" : mediaUri } \
# Settings = { "VocabularyName" : "MyVocabulary" } \
)
# return the response structure found in the Transcribe Documentation
return response
# ==================================================================================
# Function: getTranscriptionJobStatus
# Purpose: Helper function to return the status of a job running the Amazon Transcribe service
# Parameters:
# jobName - the unique jobName used to start the Amazon Transcribe job
# ==================================================================================
def getTranscriptionJobStatus(jobName):
transcribe = boto3.client('transcribe')
response = transcribe.get_transcription_job(TranscriptionJobName=jobName)
return response
# ==================================================================================
# Function: getTranscript
# Purpose: Helper function to return the transcript based on the signed URI in S3 as produced by the Transcript job
# Parameters:
# transcriptURI - the signed S3 URI for the Transcribe output
# ==================================================================================
def getTranscript(transcriptURI):
# Get the resulting Transcription Job and store the JSON response in transcript
result = requests.get(transcriptURI)
return result.text