samples: Code samples for Speech-to-Text V2 (#460)

Co-authored-by: Nitsan Shai <nshai@google.com>
GoogleCloudPlatform · Mar 13, 2023 · 9ea65f1 · 9ea65f1
1 parent 2896c00
commit 9ea65f1
Show file tree

Hide file tree

Showing 17 changed files with 882 additions and 4 deletions.
diff --git a/speech/snippets/adaptation_v2_custom_class_reference.py b/speech/snippets/adaptation_v2_custom_class_reference.py
@@ -0,0 +1,92 @@
+# Copyright 2022 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# [START speech_adaptation_v2_custom_class_reference]
+import io
+
+from google.cloud.speech_v2 import SpeechClient
+from google.cloud.speech_v2.types import cloud_speech
+
+
+def adaptation_v2_custom_class_reference(project_id, recognizer_id, phrase_set_id, custom_class_id, audio_file):
+    # Instantiates a client
+    client = SpeechClient()
+
+    request = cloud_speech.CreateRecognizerRequest(
+        parent=f"projects/{project_id}/locations/global",
+        recognizer_id=recognizer_id,
+        recognizer=cloud_speech.Recognizer(
+            language_codes=["en-US"], model="latest_short"
+        ),
+    )
+
+    # Creates a Recognizer
+    operation = client.create_recognizer(request=request)
+    recognizer = operation.result()
+
+    # Reads a file as bytes
+    with io.open(audio_file, "rb") as f:
+        content = f.read()
+
+    # Create a persistent CustomClass to reference in phrases
+    request = cloud_speech.CreateCustomClassRequest(
+        parent=f"projects/{project_id}/locations/global",
+        custom_class_id=custom_class_id,
+        custom_class=cloud_speech.CustomClass(items=[{"value": "Keem"}]))
+
+    operation = client.create_custom_class(request=request)
+    custom_class = operation.result()
+
+    # Create a persistent PhraseSet to reference in a recognition request
+    request = cloud_speech.CreatePhraseSetRequest(
+        parent=f"projects/{project_id}/locations/global",
+        phrase_set_id=phrase_set_id,
+        phrase_set=cloud_speech.PhraseSet(phrases=[{"value": f"${{{custom_class.name}}}", "boost": 20}]))
+
+    operation = client.create_phrase_set(request=request)
+    phrase_set = operation.result()
+
+    # Add a reference of the PhraseSet into the recognition request
+    adaptation = cloud_speech.SpeechAdaptation(
+        phrase_sets=[
+            cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
+                phrase_set=phrase_set.name
+            )
+        ]
+    )
+    config = cloud_speech.RecognitionConfig(
+        auto_decoding_config={}, adaptation=adaptation
+    )
+
+    print(custom_class)
+    print(phrase_set)
+    print(config)
+
+    request = cloud_speech.RecognizeRequest(
+        recognizer=recognizer.name, config=config, content=content
+    )
+
+    # Transcribes the audio into text
+    response = client.recognize(request=request)
+
+    for result in response.results:
+        print("Transcript: {}".format(result.alternatives[0].transcript))
+
+    return response
+# [END speech_adaptation_v2_custom_class_reference]
+
+
+if __name__ == "__main__":
+    adaptation_v2_custom_class_reference()
diff --git a/speech/snippets/adaptation_v2_custom_class_reference_test.py b/speech/snippets/adaptation_v2_custom_class_reference_test.py
@@ -0,0 +1,70 @@
+# Copyright 2022, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from uuid import uuid4
+
+from google.cloud.speech_v2 import SpeechClient
+from google.cloud.speech_v2.types import cloud_speech
+
+import adaptation_v2_custom_class_reference
+
+RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
+
+
+def delete_recognizer(name):
+    client = SpeechClient()
+    request = cloud_speech.DeleteRecognizerRequest(name=name)
+    client.delete_recognizer(request=request)
+
+
+def delete_phrase_set(name):
+    client = SpeechClient()
+    request = cloud_speech.DeletePhraseSetRequest(name=name)
+    client.delete_phrase_set(request=request)
+
+
+def delete_custom_class(name):
+    client = SpeechClient()
+    request = cloud_speech.DeleteCustomClassRequest(name=name)
+    client.delete_custom_class(request=request)
+
+
+def test_adaptation_v2_custom_class_reference(capsys):
+    project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
+
+    recognizer_id = "recognizer-" + str(uuid4())
+    phrase_set_id = "phrase-set-" + str(uuid4())
+    custom_class_id = "custom-class-" + str(uuid4())
+    response = adaptation_v2_custom_class_reference.adaptation_v2_custom_class_reference(
+        project_id, recognizer_id, phrase_set_id, custom_class_id, os.path.join(RESOURCES, "baby_keem.wav")
+    )
+
+    assert re.search(
+        r"play Baby Keem",
+        response.results[0].alternatives[0].transcript,
+        re.DOTALL | re.I,
+    )
+
+    delete_recognizer(
+        f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
+    )
+
+    delete_phrase_set(
+        f"projects/{project_id}/locations/global/phraseSets/{phrase_set_id}"
+    )
+
+    delete_custom_class(
+        f"projects/{project_id}/locations/global/customClasses/{custom_class_id}"
+    )
diff --git a/speech/snippets/adaptation_v2_inline_custom_class.py b/speech/snippets/adaptation_v2_inline_custom_class.py
@@ -0,0 +1,73 @@
+# Copyright 2022 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# [START speech_adaptation_v2_inline_custom_class]
+import io
+
+from google.cloud.speech_v2 import SpeechClient
+from google.cloud.speech_v2.types import cloud_speech
+
+
+def adaptation_v2_inline_custom_class(project_id, recognizer_id, audio_file):
+    # Instantiates a client
+    client = SpeechClient()
+
+    request = cloud_speech.CreateRecognizerRequest(
+        parent=f"projects/{project_id}/locations/global",
+        recognizer_id=recognizer_id,
+        recognizer=cloud_speech.Recognizer(
+            language_codes=["en-US"], model="latest_short"
+        ),
+    )
+
+    # Creates a Recognizer
+    operation = client.create_recognizer(request=request)
+    recognizer = operation.result()
+
+    # Reads a file as bytes
+    with io.open(audio_file, "rb") as f:
+        content = f.read()
+
+    # Build inline phrase set to produce a more accurate transcript
+    phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "${keem}", "boost": 20}])
+    custom_class = cloud_speech.CustomClass(name="keem", items=[{"value": "Keem"}])
+    adaptation = cloud_speech.SpeechAdaptation(
+        phrase_sets=[
+            cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
+                inline_phrase_set=phrase_set
+            )
+        ],
+        custom_classes=[custom_class]
+    )
+    config = cloud_speech.RecognitionConfig(
+        auto_decoding_config={}, adaptation=adaptation
+    )
+
+    request = cloud_speech.RecognizeRequest(
+        recognizer=recognizer.name, config=config, content=content
+    )
+
+    # Transcribes the audio into text
+    response = client.recognize(request=request)
+
+    for result in response.results:
+        print("Transcript: {}".format(result.alternatives[0].transcript))
+
+    return response
+# [END speech_adaptation_v2_inline_custom_class]
+
+
+if __name__ == "__main__":
+    adaptation_v2_inline_custom_class()
diff --git a/speech/snippets/adaptation_v2_inline_custom_class_test.py b/speech/snippets/adaptation_v2_inline_custom_class_test.py
@@ -0,0 +1,48 @@
+# Copyright 2022, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+from uuid import uuid4
+
+from google.cloud.speech_v2 import SpeechClient
+from google.cloud.speech_v2.types import cloud_speech
+
+import adaptation_v2_inline_custom_class
+
+RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
+
+
+def delete_recognizer(name):
+    client = SpeechClient()
+    request = cloud_speech.DeleteRecognizerRequest(name=name)
+    client.delete_recognizer(request=request)
+
+
+def test_adaptation_v2_inline_custom_class(capsys):
+    project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
+
+    recognizer_id = "recognizer-" + str(uuid4())
+    response = adaptation_v2_inline_custom_class.adaptation_v2_inline_custom_class(
+        project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav")
+    )
+
+    assert re.search(
+        r"play Baby Keem",
+        response.results[0].alternatives[0].transcript,
+        re.DOTALL | re.I,
+    )
+
+    delete_recognizer(
+        f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
+    )
diff --git a/speech/snippets/adaptation_v2_inline_phrase_set.py b/speech/snippets/adaptation_v2_inline_phrase_set.py
@@ -0,0 +1,71 @@
+# Copyright 2022 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# [START speech_adaptation_v2_inline_phrase_set]
+import io
+
+from google.cloud.speech_v2 import SpeechClient
+from google.cloud.speech_v2.types import cloud_speech
+
+
+def adaptation_v2_inline_phrase_set(project_id, recognizer_id, audio_file):
+    # Instantiates a client
+    client = SpeechClient()
+
+    request = cloud_speech.CreateRecognizerRequest(
+        parent=f"projects/{project_id}/locations/global",
+        recognizer_id=recognizer_id,
+        recognizer=cloud_speech.Recognizer(
+            language_codes=["en-US"], model="latest_short"
+        ),
+    )
+
+    # Creates a Recognizer
+    operation = client.create_recognizer(request=request)
+    recognizer = operation.result()
+
+    # Reads a file as bytes
+    with io.open(audio_file, "rb") as f:
+        content = f.read()
+
+    # Build inline phrase set to produce a more accurate transcript
+    phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}])
+    adaptation = cloud_speech.SpeechAdaptation(
+        phrase_sets=[
+            cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
+                inline_phrase_set=phrase_set
+            )
+        ]
+    )
+    config = cloud_speech.RecognitionConfig(
+        auto_decoding_config={}, adaptation=adaptation
+    )
+
+    request = cloud_speech.RecognizeRequest(
+        recognizer=recognizer.name, config=config, content=content
+    )
+
+    # Transcribes the audio into text
+    response = client.recognize(request=request)
+
+    for result in response.results:
+        print("Transcript: {}".format(result.alternatives[0].transcript))
+
+    return response
+# [END speech_adaptation_v2_inline_phrase_set]
+
+
+if __name__ == "__main__":
+    adaptation_v2_inline_phrase_set()