Skip to content

Commit

Permalink
Added the sample for Word Level Confidence [(#1567)](GoogleCloudPlatf…
Browse files Browse the repository at this point in the history
…orm/python-docs-samples#1567)

* Added the sample for Word Level Confidence

* Added the extra line

* Added parameter comment

* Removed the line with blank space
  • Loading branch information
happyhuman authored and busunkim96 committed Sep 3, 2020
1 parent fa88ec5 commit 926a711
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 3 deletions.
1 change: 1 addition & 0 deletions google-cloud-speech/samples/snippets/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ To run this sample:
python beta_snippets.py diarization resources/commercial_mono.wav
python beta_snippets.py multi-channel resources/commercial_mono.wav
python beta_snippets.py multi-language resources/multi.wav en-US es
python beta_snippets.py word-level-conf resources/commercial_mono.wav
positional arguments:
command
Expand Down
42 changes: 40 additions & 2 deletions google-cloud-speech/samples/snippets/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
python beta_snippets.py diarization resources/commercial_mono.wav
python beta_snippets.py multi-channel resources/commercial_mono.wav
python beta_snippets.py multi-language resources/multi.wav en-US es
python beta_snippets.py word-level-conf resources/commercial_mono.wav
"""

import argparse
Expand Down Expand Up @@ -240,6 +241,39 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
# [END speech_transcribe_multilanguage]


def transcribe_file_with_word_level_confidence(speech_file):
"""Transcribe the given audio file synchronously with
word level confidence."""
# [START speech_transcribe_word_level_confidence]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US',
enable_word_confidence=True)

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'First Word and Confidence: ({}, {})'.format(
alternative.words[0].word, alternative.words[0].confidence))
# [END speech_transcribe_word_level_confidence]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
Expand All @@ -248,9 +282,11 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
parser.add_argument(
'path', help='File for audio file to be recognized')
parser.add_argument(
'first', help='First language in audio file to be recognized')
'first', help='First language in audio file to be recognized',
nargs='?')
parser.add_argument(
'second', help='Second language in audio file to be recognized')
'second', help='Second language in audio file to be recognized',
nargs='?')

args = parser.parse_args()

Expand All @@ -266,3 +302,5 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
transcribe_file_with_multichannel(args.path)
elif args.command == 'multi-language':
transcribe_file_with_multilanguage(args.path, args.first, args.second)
elif args.command == 'word-level-conf':
transcribe_file_with_word_level_confidence(args.path)
11 changes: 10 additions & 1 deletion google-cloud-speech/samples/snippets/beta_snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
transcribe_file_with_enhanced_model,
transcribe_file_with_metadata,
transcribe_file_with_multichannel,
transcribe_file_with_multilanguage)
transcribe_file_with_multilanguage,
transcribe_file_with_word_level_confidence)

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')

Expand Down Expand Up @@ -70,3 +71,11 @@ def test_transcribe_multilanguage_file(capsys):
out, err = capsys.readouterr()

assert 'how are you doing estoy bien e tu' in out


def test_transcribe_word_level_confidence(capsys):
transcribe_file_with_word_level_confidence(
os.path.join(RESOURCES, 'Google_Gnome.wav'))
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out

0 comments on commit 926a711

Please sign in to comment.