diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md index d7bc3a3c43c..c0397107edb 100644 --- a/speech/cloud-client/README.md +++ b/speech/cloud-client/README.md @@ -24,3 +24,15 @@ You can then run a given `ClassName` via: ### Transcribe a local audio file (using the quickstart sample) mvn exec:java -Dexec.mainClass=com.example.speech.QuickstartSample + +### Transcribe a local audio file (using the recognize sample) +``` + mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \ + -Dexec.args="syncrecognize ./resources/audio.raw" +``` + +### Transcribe a remote audio file (using the recognize sample) +``` + mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \ + -Dexec.args="syncrecognize 'gs://cloud-samples-tests/speech/brooklyn.flac'" +``` diff --git a/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java b/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java index 23da6b476d6..73e234836f9 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java +++ b/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java @@ -64,6 +64,7 @@ public static void main(String... args) throws Exception { System.out.printf("Transcription: %s%n", alternative.getTranscript()); } } + speech.close(); } } // [END speech_quickstart] diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java new file mode 100644 index 00000000000..6b6abef94d2 --- /dev/null +++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java @@ -0,0 +1,210 @@ +/* + Copyright 2017, Google Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package com.example.speech; + +import com.google.api.gax.grpc.OperationFuture; +import com.google.cloud.speech.spi.v1beta1.SpeechClient; +import com.google.cloud.speech.v1beta1.AsyncRecognizeResponse; +import com.google.cloud.speech.v1beta1.RecognitionAudio; +import com.google.cloud.speech.v1beta1.RecognitionConfig; +import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1beta1.SpeechRecognitionResult; +import com.google.cloud.speech.v1beta1.SyncRecognizeResponse; +import com.google.protobuf.ByteString; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +public class Recognize { + public static void main(String... args) throws Exception { + if (args.length < 1) { + System.out.println("Usage:"); + System.out.printf( + "\tjava %s \"\" \"\"\n" + + "Commands:\n" + + "\tsyncrecognize | asyncrecognize\n" + + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI " + + "for a Cloud Storage resource (gs://...)\n", + Recognize.class.getCanonicalName()); + return; + } + String command = args[0]; + String path = args.length > 1 ? args[1] : ""; + + // Use command and GCS path pattern to invoke transcription. + if (command.equals("syncrecognize")) { + if (path.startsWith("gs://")) { + syncRecognizeGcs(path); + } else { + syncRecognizeFile(path); + } + } else if (command.equals("asyncrecognize")) { + if (path.startsWith("gs://")) { + asyncRecognizeGcs(path); + } else { + asyncRecognizeFile(path); + } + } + } + + /** + * Performs speech recognition on raw PCM audio and prints the transcription. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void syncRecognizeFile(String fileName) throws Exception, IOException { + SpeechClient speech = SpeechClient.create(); + + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + // Configure request with local raw PCM audio + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRate(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setContent(audioBytes) + .build(); + + // Use blocking call to get audio transcript + SyncRecognizeResponse response = speech.syncRecognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result: results) { + List alternatives = result.getAlternativesList(); + for (SpeechRecognitionAlternative alternative: alternatives) { + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + speech.close(); + } + + /** + * Performs speech recognition on remote FLAC file and prints the transcription. + * + * @param gcsUri the path to the remote FLAC audio file to transcribe. + */ + public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException { + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + SpeechClient speech = SpeechClient.create(); + + // Builds the request for remote FLAC file + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setSampleRate(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setUri(gcsUri) + .build(); + + // Use blocking call for getting audio transcript + SyncRecognizeResponse response = speech.syncRecognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result: results) { + List alternatives = result.getAlternativesList(); + for (SpeechRecognitionAlternative alternative: alternatives) { + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + speech.close(); + } + + /** + * Performs non-blocking speech recognition on raw PCM audio and prints + * the transcription. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void asyncRecognizeFile(String fileName) throws Exception, IOException { + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + SpeechClient speech = SpeechClient.create(); + + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + // Configure request with local raw PCM audio + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRate(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setContent(audioBytes) + .build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = speech.asyncRecognizeAsync(config, audio); + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(200); + } + + List results = response.get().getResultsList(); + + for (SpeechRecognitionResult result: results) { + List alternatives = result.getAlternativesList(); + for (SpeechRecognitionAlternative alternative: alternatives) { + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + speech.close(); + } + + /** + * Performs non-blocking speech recognition on remote FLAC file and prints + * the transcription. + * + * @param gcsUri the path to the remote FLAC audio file to transcribe. + */ + public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException { + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + SpeechClient speech = SpeechClient.create(); + + // Configure remote file request for FLAC file + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setSampleRate(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setUri(gcsUri) + .build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = speech.asyncRecognizeAsync(config, audio); + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(200); + } + + List results = response.get().getResultsList(); + + for (SpeechRecognitionResult result: results) { + List alternatives = result.getAlternativesList(); + for (SpeechRecognitionAlternative alternative: alternatives) { + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + speech.close(); + } +} diff --git a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java new file mode 100644 index 00000000000..29ec44934f7 --- /dev/null +++ b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java @@ -0,0 +1,82 @@ +/* + Copyright 2017, Google, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + +/** + * Tests for speech recognize sample. + */ +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class RecognizeIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + // The path to the audio file to transcribe + private String fileName = "./resources/audio.raw"; + private String gcsPath = "gs://cloud-samples-tests/speech/brooklyn.flac"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testRecognizeFile() throws Exception { + Recognize.syncRecognizeFile(fileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testRecognizeGcs() throws Exception { + Recognize.syncRecognizeGcs(gcsPath); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testAsyncRecognizeFile() throws Exception { + Recognize.asyncRecognizeFile(fileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testAsyncRecognizeGcs() throws Exception { + Recognize.asyncRecognizeGcs(gcsPath); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } +}