Skip to content

Commit

Permalink
samples: Code samples for Speech-to-Text V2 (#460)
Browse files Browse the repository at this point in the history
Co-authored-by: Nitsan Shai <nshai@google.com>
  • Loading branch information
2 people authored and telpirion committed Mar 13, 2023
1 parent 2896c00 commit 9ea65f1
Show file tree
Hide file tree
Showing 17 changed files with 882 additions and 4 deletions.
92 changes: 92 additions & 0 deletions speech/snippets/adaptation_v2_custom_class_reference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright 2022 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# [START speech_adaptation_v2_custom_class_reference]
import io

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech


def adaptation_v2_custom_class_reference(project_id, recognizer_id, phrase_set_id, custom_class_id, audio_file):
# Instantiates a client
client = SpeechClient()

request = cloud_speech.CreateRecognizerRequest(
parent=f"projects/{project_id}/locations/global",
recognizer_id=recognizer_id,
recognizer=cloud_speech.Recognizer(
language_codes=["en-US"], model="latest_short"
),
)

# Creates a Recognizer
operation = client.create_recognizer(request=request)
recognizer = operation.result()

# Reads a file as bytes
with io.open(audio_file, "rb") as f:
content = f.read()

# Create a persistent CustomClass to reference in phrases
request = cloud_speech.CreateCustomClassRequest(
parent=f"projects/{project_id}/locations/global",
custom_class_id=custom_class_id,
custom_class=cloud_speech.CustomClass(items=[{"value": "Keem"}]))

operation = client.create_custom_class(request=request)
custom_class = operation.result()

# Create a persistent PhraseSet to reference in a recognition request
request = cloud_speech.CreatePhraseSetRequest(
parent=f"projects/{project_id}/locations/global",
phrase_set_id=phrase_set_id,
phrase_set=cloud_speech.PhraseSet(phrases=[{"value": f"${{{custom_class.name}}}", "boost": 20}]))

operation = client.create_phrase_set(request=request)
phrase_set = operation.result()

# Add a reference of the PhraseSet into the recognition request
adaptation = cloud_speech.SpeechAdaptation(
phrase_sets=[
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
phrase_set=phrase_set.name
)
]
)
config = cloud_speech.RecognitionConfig(
auto_decoding_config={}, adaptation=adaptation
)

print(custom_class)
print(phrase_set)
print(config)

request = cloud_speech.RecognizeRequest(
recognizer=recognizer.name, config=config, content=content
)

# Transcribes the audio into text
response = client.recognize(request=request)

for result in response.results:
print("Transcript: {}".format(result.alternatives[0].transcript))

return response
# [END speech_adaptation_v2_custom_class_reference]


if __name__ == "__main__":
adaptation_v2_custom_class_reference()
70 changes: 70 additions & 0 deletions speech/snippets/adaptation_v2_custom_class_reference_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright 2022, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
from uuid import uuid4

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

import adaptation_v2_custom_class_reference

RESOURCES = os.path.join(os.path.dirname(__file__), "resources")


def delete_recognizer(name):
client = SpeechClient()
request = cloud_speech.DeleteRecognizerRequest(name=name)
client.delete_recognizer(request=request)


def delete_phrase_set(name):
client = SpeechClient()
request = cloud_speech.DeletePhraseSetRequest(name=name)
client.delete_phrase_set(request=request)


def delete_custom_class(name):
client = SpeechClient()
request = cloud_speech.DeleteCustomClassRequest(name=name)
client.delete_custom_class(request=request)


def test_adaptation_v2_custom_class_reference(capsys):
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")

recognizer_id = "recognizer-" + str(uuid4())
phrase_set_id = "phrase-set-" + str(uuid4())
custom_class_id = "custom-class-" + str(uuid4())
response = adaptation_v2_custom_class_reference.adaptation_v2_custom_class_reference(
project_id, recognizer_id, phrase_set_id, custom_class_id, os.path.join(RESOURCES, "baby_keem.wav")
)

assert re.search(
r"play Baby Keem",
response.results[0].alternatives[0].transcript,
re.DOTALL | re.I,
)

delete_recognizer(
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
)

delete_phrase_set(
f"projects/{project_id}/locations/global/phraseSets/{phrase_set_id}"
)

delete_custom_class(
f"projects/{project_id}/locations/global/customClasses/{custom_class_id}"
)
73 changes: 73 additions & 0 deletions speech/snippets/adaptation_v2_inline_custom_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Copyright 2022 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# [START speech_adaptation_v2_inline_custom_class]
import io

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech


def adaptation_v2_inline_custom_class(project_id, recognizer_id, audio_file):
# Instantiates a client
client = SpeechClient()

request = cloud_speech.CreateRecognizerRequest(
parent=f"projects/{project_id}/locations/global",
recognizer_id=recognizer_id,
recognizer=cloud_speech.Recognizer(
language_codes=["en-US"], model="latest_short"
),
)

# Creates a Recognizer
operation = client.create_recognizer(request=request)
recognizer = operation.result()

# Reads a file as bytes
with io.open(audio_file, "rb") as f:
content = f.read()

# Build inline phrase set to produce a more accurate transcript
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "${keem}", "boost": 20}])
custom_class = cloud_speech.CustomClass(name="keem", items=[{"value": "Keem"}])
adaptation = cloud_speech.SpeechAdaptation(
phrase_sets=[
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
inline_phrase_set=phrase_set
)
],
custom_classes=[custom_class]
)
config = cloud_speech.RecognitionConfig(
auto_decoding_config={}, adaptation=adaptation
)

request = cloud_speech.RecognizeRequest(
recognizer=recognizer.name, config=config, content=content
)

# Transcribes the audio into text
response = client.recognize(request=request)

for result in response.results:
print("Transcript: {}".format(result.alternatives[0].transcript))

return response
# [END speech_adaptation_v2_inline_custom_class]


if __name__ == "__main__":
adaptation_v2_inline_custom_class()
48 changes: 48 additions & 0 deletions speech/snippets/adaptation_v2_inline_custom_class_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright 2022, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
from uuid import uuid4

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

import adaptation_v2_inline_custom_class

RESOURCES = os.path.join(os.path.dirname(__file__), "resources")


def delete_recognizer(name):
client = SpeechClient()
request = cloud_speech.DeleteRecognizerRequest(name=name)
client.delete_recognizer(request=request)


def test_adaptation_v2_inline_custom_class(capsys):
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")

recognizer_id = "recognizer-" + str(uuid4())
response = adaptation_v2_inline_custom_class.adaptation_v2_inline_custom_class(
project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav")
)

assert re.search(
r"play Baby Keem",
response.results[0].alternatives[0].transcript,
re.DOTALL | re.I,
)

delete_recognizer(
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
)
71 changes: 71 additions & 0 deletions speech/snippets/adaptation_v2_inline_phrase_set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright 2022 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# [START speech_adaptation_v2_inline_phrase_set]
import io

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech


def adaptation_v2_inline_phrase_set(project_id, recognizer_id, audio_file):
# Instantiates a client
client = SpeechClient()

request = cloud_speech.CreateRecognizerRequest(
parent=f"projects/{project_id}/locations/global",
recognizer_id=recognizer_id,
recognizer=cloud_speech.Recognizer(
language_codes=["en-US"], model="latest_short"
),
)

# Creates a Recognizer
operation = client.create_recognizer(request=request)
recognizer = operation.result()

# Reads a file as bytes
with io.open(audio_file, "rb") as f:
content = f.read()

# Build inline phrase set to produce a more accurate transcript
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}])
adaptation = cloud_speech.SpeechAdaptation(
phrase_sets=[
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
inline_phrase_set=phrase_set
)
]
)
config = cloud_speech.RecognitionConfig(
auto_decoding_config={}, adaptation=adaptation
)

request = cloud_speech.RecognizeRequest(
recognizer=recognizer.name, config=config, content=content
)

# Transcribes the audio into text
response = client.recognize(request=request)

for result in response.results:
print("Transcript: {}".format(result.alternatives[0].transcript))

return response
# [END speech_adaptation_v2_inline_phrase_set]


if __name__ == "__main__":
adaptation_v2_inline_phrase_set()
Loading

0 comments on commit 9ea65f1

Please sign in to comment.