From 4eba6f2b6fe73bea33d95b958ef7817dd85290ab Mon Sep 17 00:00:00 2001 From: Gus Class Date: Tue, 7 Feb 2017 12:20:34 -0800 Subject: [PATCH 1/3] Adds sync / async examples for local and remote files --- speech/cloud-client/README.md | 12 ++ .../com/example/speech/QuickstartSample.java | 1 + .../java/com/example/speech/Recognize.java | 185 ++++++++++++++++++ .../java/com/example/speech/RecognizeIT.java | 82 ++++++++ 4 files changed, 280 insertions(+) create mode 100644 speech/cloud-client/src/main/java/com/example/speech/Recognize.java create mode 100644 speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md index d7bc3a3c43c..c0397107edb 100644 --- a/speech/cloud-client/README.md +++ b/speech/cloud-client/README.md @@ -24,3 +24,15 @@ You can then run a given `ClassName` via: ### Transcribe a local audio file (using the quickstart sample) mvn exec:java -Dexec.mainClass=com.example.speech.QuickstartSample + +### Transcribe a local audio file (using the recognize sample) +``` + mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \ + -Dexec.args="syncrecognize ./resources/audio.raw" +``` + +### Transcribe a remote audio file (using the recognize sample) +``` + mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \ + -Dexec.args="syncrecognize 'gs://cloud-samples-tests/speech/brooklyn.flac'" +``` diff --git a/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java b/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java index 23da6b476d6..73e234836f9 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java +++ b/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java @@ -64,6 +64,7 @@ public static void main(String... args) throws Exception { System.out.printf("Transcription: %s%n", alternative.getTranscript()); } } + speech.close(); } } // [END speech_quickstart] diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java new file mode 100644 index 00000000000..ed5acdd06bf --- /dev/null +++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java @@ -0,0 +1,185 @@ +/* + Copyright 2017, Google Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package com.example.speech; + +import com.google.api.gax.grpc.OperationFuture; +import com.google.cloud.speech.spi.v1beta1.SpeechClient; +import com.google.cloud.speech.v1beta1.AsyncRecognizeResponse; +import com.google.cloud.speech.v1beta1.RecognitionAudio; +import com.google.cloud.speech.v1beta1.RecognitionConfig; +import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1beta1.SpeechRecognitionResult; +import com.google.cloud.speech.v1beta1.SyncRecognizeResponse; +import com.google.protobuf.ByteString; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +public class Recognize { + public static void main(String... args) throws Exception { + if (args.length < 1) { + System.out.println("Usage:"); + System.out.printf( + "\tjava %s \"\" \"\"\n" + + "Commands:\n" + + "\tsyncrecognize | asyncrecognize\n" + + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI " + + "for a Cloud Storage resource (gs://...)\n", + Recognize.class.getCanonicalName()); + return; + } + String command = args[0]; + String path = args.length > 1 ? args[1] : ""; + + if (command.equals("syncrecognize")) { + if (path.startsWith("gs://")) { + syncRecognizeGcs(path); + } else { + syncRecognizeFile(path); + } + } else if (command.equals("asyncrecognize")) { + if (path.startsWith("gs://")) { + asyncRecognizeGcs(path); + } else { + asyncRecognizeFile(path); + } + } + + + } + + public static void syncRecognizeFile(String fileName) throws Exception, IOException { + SpeechClient speech = SpeechClient.create(); + + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRate(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setContent(audioBytes) + .build(); + + SyncRecognizeResponse response = speech.syncRecognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result: results) { + List alternatives = result.getAlternativesList(); + for (SpeechRecognitionAlternative alternative: alternatives) { + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + speech.close(); + } + + public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException { + // Instantiates a client + SpeechClient speech = SpeechClient.create(); + + // Builds the sync recognize request + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setSampleRate(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setUri(gcsUri) + .build(); + + // Performs speech recognition on the audio file + SyncRecognizeResponse response = speech.syncRecognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result: results) { + List alternatives = result.getAlternativesList(); + for (SpeechRecognitionAlternative alternative: alternatives) { + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + + + speech.close(); + } + + public static void asyncRecognizeFile(String fileName) throws Exception, IOException { + SpeechClient speech = SpeechClient.create(); + + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRate(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setContent(audioBytes) + .build(); + + OperationFuture response = speech.asyncRecognizeAsync(config, audio); + + while(!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(200); + } + + List results = response.get().getResultsList(); + + for (SpeechRecognitionResult result: results) { + List alternatives = result.getAlternativesList(); + for (SpeechRecognitionAlternative alternative: alternatives) { + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + speech.close(); + } + + public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException { + SpeechClient speech = SpeechClient.create(); + + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setSampleRate(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setUri(gcsUri) + .build(); + + OperationFuture response = speech.asyncRecognizeAsync(config, audio); + + while(!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(200); + } + + List results = response.get().getResultsList(); + + for (SpeechRecognitionResult result: results) { + List alternatives = result.getAlternativesList(); + for (SpeechRecognitionAlternative alternative: alternatives) { + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + speech.close(); + } +} diff --git a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java new file mode 100644 index 00000000000..29ec44934f7 --- /dev/null +++ b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java @@ -0,0 +1,82 @@ +/* + Copyright 2017, Google, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + +/** + * Tests for speech recognize sample. + */ +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class RecognizeIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + // The path to the audio file to transcribe + private String fileName = "./resources/audio.raw"; + private String gcsPath = "gs://cloud-samples-tests/speech/brooklyn.flac"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testRecognizeFile() throws Exception { + Recognize.syncRecognizeFile(fileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testRecognizeGcs() throws Exception { + Recognize.syncRecognizeGcs(gcsPath); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testAsyncRecognizeFile() throws Exception { + Recognize.asyncRecognizeFile(fileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testAsyncRecognizeGcs() throws Exception { + Recognize.asyncRecognizeGcs(gcsPath); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } +} From 9fb65f54f131c4def2082404341d64241dfb37a8 Mon Sep 17 00:00:00 2001 From: Gus Class Date: Tue, 7 Feb 2017 12:48:15 -0800 Subject: [PATCH 2/3] Fixes whitespace around while blocks --- .../src/main/java/com/example/speech/Recognize.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java index ed5acdd06bf..a8391403af3 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java +++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java @@ -48,7 +48,7 @@ public static void main(String... args) throws Exception { } String command = args[0]; String path = args.length > 1 ? args[1] : ""; - + if (command.equals("syncrecognize")) { if (path.startsWith("gs://")) { syncRecognizeGcs(path); @@ -62,7 +62,7 @@ public static void main(String... args) throws Exception { asyncRecognizeFile(path); } } - + } @@ -138,7 +138,7 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep OperationFuture response = speech.asyncRecognizeAsync(config, audio); - while(!response.isDone()) { + while (!response.isDone()) { System.out.println("Waiting for response..."); Thread.sleep(200); } @@ -167,7 +167,7 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio OperationFuture response = speech.asyncRecognizeAsync(config, audio); - while(!response.isDone()) { + while (!response.isDone()) { System.out.println("Waiting for response..."); Thread.sleep(200); } From 7933aa6465a65374bed4e9d497b377ecb80f2b5a Mon Sep 17 00:00:00 2001 From: Gus Class Date: Tue, 7 Feb 2017 15:53:28 -0800 Subject: [PATCH 3/3] Adds some basic javadocs and comments --- .../java/com/example/speech/Recognize.java | 43 +++++++++++++++---- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java index a8391403af3..6b6abef94d2 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java +++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java @@ -49,6 +49,7 @@ public static void main(String... args) throws Exception { String command = args[0]; String path = args.length > 1 ? args[1] : ""; + // Use command and GCS path pattern to invoke transcription. if (command.equals("syncrecognize")) { if (path.startsWith("gs://")) { syncRecognizeGcs(path); @@ -62,10 +63,13 @@ public static void main(String... args) throws Exception { asyncRecognizeFile(path); } } - - } + /** + * Performs speech recognition on raw PCM audio and prints the transcription. + * + * @param fileName the path to a PCM audio file to transcribe. + */ public static void syncRecognizeFile(String fileName) throws Exception, IOException { SpeechClient speech = SpeechClient.create(); @@ -73,6 +77,7 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept byte[] data = Files.readAllBytes(path); ByteString audioBytes = ByteString.copyFrom(data); + // Configure request with local raw PCM audio RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.LINEAR16) .setSampleRate(16000) @@ -81,6 +86,7 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept .setContent(audioBytes) .build(); + // Use blocking call to get audio transcript SyncRecognizeResponse response = speech.syncRecognize(config, audio); List results = response.getResultsList(); @@ -93,11 +99,16 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept speech.close(); } + /** + * Performs speech recognition on remote FLAC file and prints the transcription. + * + * @param gcsUri the path to the remote FLAC audio file to transcribe. + */ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException { - // Instantiates a client + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS SpeechClient speech = SpeechClient.create(); - // Builds the sync recognize request + // Builds the request for remote FLAC file RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.FLAC) .setSampleRate(16000) @@ -106,7 +117,7 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException .setUri(gcsUri) .build(); - // Performs speech recognition on the audio file + // Use blocking call for getting audio transcript SyncRecognizeResponse response = speech.syncRecognize(config, audio); List results = response.getResultsList(); @@ -116,18 +127,24 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException System.out.printf("Transcription: %s%n", alternative.getTranscript()); } } - - speech.close(); } + /** + * Performs non-blocking speech recognition on raw PCM audio and prints + * the transcription. + * + * @param fileName the path to a PCM audio file to transcribe. + */ public static void asyncRecognizeFile(String fileName) throws Exception, IOException { + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS SpeechClient speech = SpeechClient.create(); Path path = Paths.get(fileName); byte[] data = Files.readAllBytes(path); ByteString audioBytes = ByteString.copyFrom(data); + // Configure request with local raw PCM audio RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.LINEAR16) .setSampleRate(16000) @@ -136,8 +153,8 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep .setContent(audioBytes) .build(); + // Use non-blocking call for getting file transcription OperationFuture response = speech.asyncRecognizeAsync(config, audio); - while (!response.isDone()) { System.out.println("Waiting for response..."); Thread.sleep(200); @@ -154,9 +171,17 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep speech.close(); } + /** + * Performs non-blocking speech recognition on remote FLAC file and prints + * the transcription. + * + * @param gcsUri the path to the remote FLAC audio file to transcribe. + */ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException { + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS SpeechClient speech = SpeechClient.create(); + // Configure remote file request for FLAC file RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.FLAC) .setSampleRate(16000) @@ -165,8 +190,8 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio .setUri(gcsUri) .build(); + // Use non-blocking call for getting file transcription OperationFuture response = speech.asyncRecognizeAsync(config, audio); - while (!response.isDone()) { System.out.println("Waiting for response..."); Thread.sleep(200);