Skip to content

Commit

Permalink
Update enhanced models and auto punctuation to GA [(#1702)](GoogleClo…
Browse files Browse the repository at this point in the history
…udPlatform/python-docs-samples#1702)

* Update enhanced models and auto punctuation to GA

* Update model-slection to GA
  • Loading branch information
nnegrey authored and busunkim96 committed Sep 3, 2020
1 parent 53feb06 commit 1138817
Show file tree
Hide file tree
Showing 8 changed files with 258 additions and 7 deletions.
62 changes: 62 additions & 0 deletions packages/google-cloud-python-speech/samples/snippets/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,68 @@ To run this sample:
Transcribe Enhanced Models
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

.. image:: https://gstatic.com/cloudssh/images/open-btn.png
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/transcribe_enhanced_model.py,speech/cloud-client/README.rst




To run this sample:

.. code-block:: bash
$ python transcribe_enhanced_model.py
usage: transcribe_enhanced_model.py [-h] path
Google Cloud Speech API sample that demonstrates enhanced models
and recognition metadata.
Example usage:
python transcribe_enhanced_model.py resources/commercial_mono.wav
positional arguments:
path File to stream to the API
optional arguments:
-h, --help show this help message and exit
Transcribe Automatic Punctuation
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

.. image:: https://gstatic.com/cloudssh/images/open-btn.png
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/transcribe_auto_punctuation.py,speech/cloud-client/README.rst




To run this sample:

.. code-block:: bash
$ python transcribe_auto_punctuation.py
usage: transcribe_auto_punctuation.py [-h] path
Google Cloud Speech API sample that demonstrates auto punctuation
and recognition metadata.
Example usage:
python transcribe_auto_punctuation.py resources/commercial_mono.wav
positional arguments:
path File to stream to the API
optional arguments:
-h, --help show this help message and exit
Beta Samples
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ samples:
- name: Transcribe Streaming
file: transcribe_streaming.py
show_help: true
- name: Transcribe Enhanced Models
file: transcribe_enhanced_model.py
show_help: true
- name: Transcribe Automatic Punctuation
file: transcribe_auto_punctuation.py
show_help: true
- name: Beta Samples
file: beta_snippets.py
show_help: true
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
google-cloud-speech==0.35.0
google-cloud-speech==0.36.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env python

# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Cloud Speech API sample that demonstrates auto punctuation
and recognition metadata.
Example usage:
python transcribe_auto_punctuation.py resources/commercial_mono.wav
"""

import argparse
import io


def transcribe_file_with_auto_punctuation(path):
"""Transcribe the given audio file with auto punctuation enabled."""
# [START speech_transcribe_auto_punctuation]
from google.cloud import speech
client = speech.SpeechClient()

# path = 'resources/commercial_mono.wav'
with io.open(path, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)
config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=8000,
language_code='en-US',
# Enable automatic punctuation
enable_automatic_punctuation=True)

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_auto_punctuation]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('path', help='File to stream to the API')

args = parser.parse_args()

transcribe_file_with_auto_punctuation(args.path)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2018, Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import transcribe_auto_punctuation

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')


def test_transcribe_file_with_auto_punctuation(capsys):
transcribe_auto_punctuation.transcribe_file_with_auto_punctuation(
'resources/commercial_mono.wav')
out, _ = capsys.readouterr()

assert 'Okay. Sure.' in out
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env python

# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Cloud Speech API sample that demonstrates enhanced models
and recognition metadata.
Example usage:
python transcribe_enhanced_model.py resources/commercial_mono.wav
"""

import argparse
import io


def transcribe_file_with_enhanced_model(path):
"""Transcribe the given audio file using an enhanced model."""
# [START speech_transcribe_enhanced_model]
from google.cloud import speech
client = speech.SpeechClient()

# path = 'resources/commercial_mono.wav'
with io.open(path, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)
config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=8000,
language_code='en-US',
# Enhanced models are only available to projects that
# opt in for audio data collection.
use_enhanced=True,
# A model must be specified to use enhanced model.
model='phone_call')

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_enhanced_model]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('path', help='File to stream to the API')

args = parser.parse_args()

transcribe_file_with_enhanced_model(args.path)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2018, Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import transcribe_enhanced_model

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')


def test_transcribe_file_with_enhanced_model(capsys):
transcribe_enhanced_model.transcribe_file_with_enhanced_model(
'resources/commercial_mono.wav')
out, _ = capsys.readouterr()

assert 'Chrome' in out
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
import argparse


# [START speech_transcribe_model_selection_beta]
# [START speech_transcribe_model_selection]
def transcribe_model_selection(speech_file, model):
"""Transcribe the given audio file synchronously with
the selected model."""
from google.cloud import speech_v1p1beta1 as speech
from google.cloud import speech
client = speech.SpeechClient()

with open(speech_file, 'rb') as audio_file:
Expand All @@ -52,14 +52,14 @@ def transcribe_model_selection(speech_file, model):
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_model_selection_beta]
# [END speech_transcribe_model_selection]


# [START speech_transcribe_model_selection_gcs_beta]
# [START speech_transcribe_model_selection_gcs]
def transcribe_model_selection_gcs(gcs_uri, model):
"""Transcribe the given audio file asynchronously with
the selected model."""
from google.cloud import speech_v1p1beta1 as speech
from google.cloud import speech
client = speech.SpeechClient()

audio = speech.types.RecognitionAudio(uri=gcs_uri)
Expand All @@ -80,7 +80,7 @@ def transcribe_model_selection_gcs(gcs_uri, model):
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_model_selection_gcs_beta]
# [END speech_transcribe_model_selection_gcs]


if __name__ == '__main__':
Expand Down

0 comments on commit 1138817

Please sign in to comment.