diff --git a/speech/README.md b/speech/README.md
deleted file mode 100644
index 635274c71cd..00000000000
--- a/speech/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Cloud Speech API samples for Java
-
-These samples have moved to [googleapis/java-speech](https://github.com/googleapis/java-speech/tree/main/samples).
\ No newline at end of file
diff --git a/speech/pom.xml b/speech/pom.xml
new file mode 100644
index 00000000000..085ac51000c
--- /dev/null
+++ b/speech/pom.xml
@@ -0,0 +1,75 @@
+
+
+ 4.0.0
+ com.example.speech
+ google-cloud-speech-snippets
+ jar
+ Google Cloud Speech Snippets
+ https://github.com/GoogleCloudPlatform/java-docs-samples/tree/main/speech
+
+
+
+ com.google.cloud.samples
+ shared-configuration
+ 1.2.0
+
+
+
+ 1.8
+ 1.8
+ UTF-8
+
+
+
+
+
+
+
+ com.google.cloud
+ libraries-bom
+ 26.1.3
+ pom
+ import
+
+
+
+
+
+
+ org.json
+ json
+ 20220924
+
+
+ com.google.cloud
+ google-cloud-speech
+
+
+
+ com.google.cloud
+ google-cloud-storage
+
+
+ commons-cli
+ commons-cli
+ 1.5.0
+
+
+ junit
+ junit
+ 4.13.2
+ test
+
+
+ com.google.truth
+ truth
+ 1.1.3
+ test
+
+
+
+
+
diff --git a/speech/resources/Google_Gnome.wav b/speech/resources/Google_Gnome.wav
new file mode 100644
index 00000000000..2f497b7fbe7
Binary files /dev/null and b/speech/resources/Google_Gnome.wav differ
diff --git a/speech/resources/audio.raw b/speech/resources/audio.raw
new file mode 100644
index 00000000000..5ebf79d3c9c
Binary files /dev/null and b/speech/resources/audio.raw differ
diff --git a/speech/resources/commercial_mono.wav b/speech/resources/commercial_mono.wav
new file mode 100644
index 00000000000..e6b9ed434f9
Binary files /dev/null and b/speech/resources/commercial_mono.wav differ
diff --git a/speech/resources/commercial_stereo.wav b/speech/resources/commercial_stereo.wav
new file mode 100644
index 00000000000..467f3687702
Binary files /dev/null and b/speech/resources/commercial_stereo.wav differ
diff --git a/speech/src/main/java/com/example/speech/InfiniteStreamRecognize.java b/speech/src/main/java/com/example/speech/InfiniteStreamRecognize.java
new file mode 100644
index 00000000000..1695f08e63a
--- /dev/null
+++ b/speech/src/main/java/com/example/speech/InfiniteStreamRecognize.java
@@ -0,0 +1,302 @@
+/*
+ * Copyright 2018 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+// [START speech_transcribe_infinite_streaming]
+
+import com.google.api.gax.rpc.ClientStream;
+import com.google.api.gax.rpc.ResponseObserver;
+import com.google.api.gax.rpc.StreamController;
+import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
+import com.google.cloud.speech.v1p1beta1.SpeechClient;
+import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig;
+import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult;
+import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest;
+import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Duration;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+import javax.sound.sampled.AudioFormat;
+import javax.sound.sampled.AudioSystem;
+import javax.sound.sampled.DataLine;
+import javax.sound.sampled.DataLine.Info;
+import javax.sound.sampled.TargetDataLine;
+
+public class InfiniteStreamRecognize {
+
+ private static final int STREAMING_LIMIT = 290000; // ~5 minutes
+
+ public static final String RED = "\033[0;31m";
+ public static final String GREEN = "\033[0;32m";
+ public static final String YELLOW = "\033[0;33m";
+
+ // Creating shared object
+ private static volatile BlockingQueue sharedQueue = new LinkedBlockingQueue();
+ private static TargetDataLine targetDataLine;
+ private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes
+
+ private static int restartCounter = 0;
+ private static ArrayList audioInput = new ArrayList();
+ private static ArrayList lastAudioInput = new ArrayList();
+ private static int resultEndTimeInMS = 0;
+ private static int isFinalEndTime = 0;
+ private static int finalRequestEndTime = 0;
+ private static boolean newStream = true;
+ private static double bridgingOffset = 0;
+ private static boolean lastTranscriptWasFinal = false;
+ private static StreamController referenceToStreamController;
+ private static ByteString tempByteString;
+
+ public static void main(String... args) {
+ InfiniteStreamRecognizeOptions options = InfiniteStreamRecognizeOptions.fromFlags(args);
+ if (options == null) {
+ // Could not parse.
+ System.out.println("Failed to parse options.");
+ System.exit(1);
+ }
+
+ try {
+ infiniteStreamingRecognize(options.langCode);
+ } catch (Exception e) {
+ System.out.println("Exception caught: " + e);
+ }
+ }
+
+ public static String convertMillisToDate(double milliSeconds) {
+ long millis = (long) milliSeconds;
+ DecimalFormat format = new DecimalFormat();
+ format.setMinimumIntegerDigits(2);
+ return String.format(
+ "%s:%s /",
+ format.format(TimeUnit.MILLISECONDS.toMinutes(millis)),
+ format.format(
+ TimeUnit.MILLISECONDS.toSeconds(millis)
+ - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis))));
+ }
+
+ /** Performs infinite streaming speech recognition */
+ public static void infiniteStreamingRecognize(String languageCode) throws Exception {
+
+ // Microphone Input buffering
+ class MicBuffer implements Runnable {
+
+ @Override
+ public void run() {
+ System.out.println(YELLOW);
+ System.out.println("Start speaking...Press Ctrl-C to stop");
+ targetDataLine.start();
+ byte[] data = new byte[BYTES_PER_BUFFER];
+ while (targetDataLine.isOpen()) {
+ try {
+ int numBytesRead = targetDataLine.read(data, 0, data.length);
+ if ((numBytesRead <= 0) && (targetDataLine.isOpen())) {
+ continue;
+ }
+ sharedQueue.put(data.clone());
+ } catch (InterruptedException e) {
+ System.out.println("Microphone input buffering interrupted : " + e.getMessage());
+ }
+ }
+ }
+ }
+
+ // Creating microphone input buffer thread
+ MicBuffer micrunnable = new MicBuffer();
+ Thread micThread = new Thread(micrunnable);
+ ResponseObserver responseObserver = null;
+ try (SpeechClient client = SpeechClient.create()) {
+ ClientStream clientStream;
+ responseObserver =
+ new ResponseObserver() {
+
+ ArrayList responses = new ArrayList<>();
+
+ public void onStart(StreamController controller) {
+ referenceToStreamController = controller;
+ }
+
+ public void onResponse(StreamingRecognizeResponse response) {
+ responses.add(response);
+ StreamingRecognitionResult result = response.getResultsList().get(0);
+ Duration resultEndTime = result.getResultEndTime();
+ resultEndTimeInMS =
+ (int)
+ ((resultEndTime.getSeconds() * 1000) + (resultEndTime.getNanos() / 1000000));
+ double correctedTime =
+ resultEndTimeInMS - bridgingOffset + (STREAMING_LIMIT * restartCounter);
+
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ if (result.getIsFinal()) {
+ System.out.print(GREEN);
+ System.out.print("\033[2K\r");
+ System.out.printf(
+ "%s: %s [confidence: %.2f]\n",
+ convertMillisToDate(correctedTime),
+ alternative.getTranscript(),
+ alternative.getConfidence());
+ isFinalEndTime = resultEndTimeInMS;
+ lastTranscriptWasFinal = true;
+ } else {
+ System.out.print(RED);
+ System.out.print("\033[2K\r");
+ System.out.printf(
+ "%s: %s", convertMillisToDate(correctedTime), alternative.getTranscript());
+ lastTranscriptWasFinal = false;
+ }
+ }
+
+ public void onComplete() {}
+
+ public void onError(Throwable t) {}
+ };
+ clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
+
+ RecognitionConfig recognitionConfig =
+ RecognitionConfig.newBuilder()
+ .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
+ .setLanguageCode(languageCode)
+ .setSampleRateHertz(16000)
+ .build();
+
+ StreamingRecognitionConfig streamingRecognitionConfig =
+ StreamingRecognitionConfig.newBuilder()
+ .setConfig(recognitionConfig)
+ .setInterimResults(true)
+ .build();
+
+ StreamingRecognizeRequest request =
+ StreamingRecognizeRequest.newBuilder()
+ .setStreamingConfig(streamingRecognitionConfig)
+ .build(); // The first request in a streaming call has to be a config
+
+ clientStream.send(request);
+
+ try {
+ // SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
+ // bigEndian: false
+ AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
+ DataLine.Info targetInfo =
+ new Info(
+ TargetDataLine.class,
+ audioFormat); // Set the system information to read from the microphone audio
+ // stream
+
+ if (!AudioSystem.isLineSupported(targetInfo)) {
+ System.out.println("Microphone not supported");
+ System.exit(0);
+ }
+ // Target data line captures the audio stream the microphone produces.
+ targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
+ targetDataLine.open(audioFormat);
+ micThread.start();
+
+ long startTime = System.currentTimeMillis();
+
+ while (true) {
+
+ long estimatedTime = System.currentTimeMillis() - startTime;
+
+ if (estimatedTime >= STREAMING_LIMIT) {
+
+ clientStream.closeSend();
+ referenceToStreamController.cancel(); // remove Observer
+
+ if (resultEndTimeInMS > 0) {
+ finalRequestEndTime = isFinalEndTime;
+ }
+ resultEndTimeInMS = 0;
+
+ lastAudioInput = null;
+ lastAudioInput = audioInput;
+ audioInput = new ArrayList();
+
+ restartCounter++;
+
+ if (!lastTranscriptWasFinal) {
+ System.out.print('\n');
+ }
+
+ newStream = true;
+
+ clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
+
+ request =
+ StreamingRecognizeRequest.newBuilder()
+ .setStreamingConfig(streamingRecognitionConfig)
+ .build();
+
+ System.out.println(YELLOW);
+ System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);
+
+ startTime = System.currentTimeMillis();
+
+ } else {
+
+ if ((newStream) && (lastAudioInput.size() > 0)) {
+ // if this is the first audio from a new request
+ // calculate amount of unfinalized audio from last request
+ // resend the audio to the speech client before incoming audio
+ double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
+ // ms length of each chunk in previous request audio arrayList
+ if (chunkTime != 0) {
+ if (bridgingOffset < 0) {
+ // bridging Offset accounts for time of resent audio
+ // calculated from last request
+ bridgingOffset = 0;
+ }
+ if (bridgingOffset > finalRequestEndTime) {
+ bridgingOffset = finalRequestEndTime;
+ }
+ int chunksFromMs =
+ (int) Math.floor((finalRequestEndTime - bridgingOffset) / chunkTime);
+ // chunks from MS is number of chunks to resend
+ bridgingOffset =
+ (int) Math.floor((lastAudioInput.size() - chunksFromMs) * chunkTime);
+ // set bridging offset for next request
+ for (int i = chunksFromMs; i < lastAudioInput.size(); i++) {
+ request =
+ StreamingRecognizeRequest.newBuilder()
+ .setAudioContent(lastAudioInput.get(i))
+ .build();
+ clientStream.send(request);
+ }
+ }
+ newStream = false;
+ }
+
+ tempByteString = ByteString.copyFrom(sharedQueue.take());
+
+ request =
+ StreamingRecognizeRequest.newBuilder().setAudioContent(tempByteString).build();
+
+ audioInput.add(tempByteString);
+ }
+
+ clientStream.send(request);
+ }
+ } catch (Exception e) {
+ System.out.println(e);
+ }
+ }
+ }
+}
+// [END speech_transcribe_infinite_streaming]
diff --git a/speech/src/main/java/com/example/speech/InfiniteStreamRecognizeOptions.java b/speech/src/main/java/com/example/speech/InfiniteStreamRecognizeOptions.java
new file mode 100644
index 00000000000..909ff2be08c
--- /dev/null
+++ b/speech/src/main/java/com/example/speech/InfiniteStreamRecognizeOptions.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+
+public class InfiniteStreamRecognizeOptions {
+ String langCode = "en-US"; // by default english US
+
+ /** Construct an InfiniteStreamRecognizeOptions class from command line flags. */
+ public static InfiniteStreamRecognizeOptions fromFlags(String[] args) {
+ Options options = new Options();
+ options.addOption(
+ Option.builder()
+ .type(String.class)
+ .longOpt("lang_code")
+ .hasArg()
+ .desc("Language code")
+ .build());
+
+ CommandLineParser parser = new DefaultParser();
+ CommandLine commandLine;
+ try {
+ commandLine = parser.parse(options, args);
+ InfiniteStreamRecognizeOptions res = new InfiniteStreamRecognizeOptions();
+
+ if (commandLine.hasOption("lang_code")) {
+ res.langCode = commandLine.getOptionValue("lang_code");
+ }
+ return res;
+ } catch (ParseException e) {
+ System.err.println(e.getMessage());
+ return null;
+ }
+ }
+}
diff --git a/speech/src/main/java/com/example/speech/QuickstartSample.java b/speech/src/main/java/com/example/speech/QuickstartSample.java
new file mode 100644
index 00000000000..245d0d0b812
--- /dev/null
+++ b/speech/src/main/java/com/example/speech/QuickstartSample.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+// [START speech_quickstart]
+// Imports the Google Cloud client library
+import com.google.cloud.speech.v1.RecognitionAudio;
+import com.google.cloud.speech.v1.RecognitionConfig;
+import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
+import com.google.cloud.speech.v1.RecognizeResponse;
+import com.google.cloud.speech.v1.SpeechClient;
+import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1.SpeechRecognitionResult;
+import java.util.List;
+
+public class QuickstartSample {
+
+ /** Demonstrates using the Speech API to transcribe an audio file. */
+ public static void main(String... args) throws Exception {
+ // Instantiates a client
+ try (SpeechClient speechClient = SpeechClient.create()) {
+
+ // The path to the audio file to transcribe
+ String gcsUri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw";
+
+ // Builds the sync recognize request
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setSampleRateHertz(16000)
+ .setLanguageCode("en-US")
+ .build();
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Performs speech recognition on the audio file
+ RecognizeResponse response = speechClient.recognize(config, audio);
+ List results = response.getResultsList();
+
+ for (SpeechRecognitionResult result : results) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcription: %s%n", alternative.getTranscript());
+ }
+ }
+ }
+}
+// [END speech_quickstart]
diff --git a/speech/src/main/java/com/example/speech/Recognize.java b/speech/src/main/java/com/example/speech/Recognize.java
new file mode 100644
index 00000000000..ab060e43ea0
--- /dev/null
+++ b/speech/src/main/java/com/example/speech/Recognize.java
@@ -0,0 +1,941 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.api.gax.longrunning.OperationTimedPollAlgorithm;
+import com.google.api.gax.retrying.RetrySettings;
+import com.google.api.gax.retrying.TimedRetryAlgorithm;
+import com.google.api.gax.rpc.ApiStreamObserver;
+import com.google.api.gax.rpc.BidiStreamingCallable;
+import com.google.api.gax.rpc.ClientStream;
+import com.google.api.gax.rpc.ResponseObserver;
+import com.google.api.gax.rpc.StreamController;
+import com.google.cloud.speech.v1.LongRunningRecognizeMetadata;
+import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
+import com.google.cloud.speech.v1.RecognitionAudio;
+import com.google.cloud.speech.v1.RecognitionConfig;
+import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
+import com.google.cloud.speech.v1.RecognizeResponse;
+import com.google.cloud.speech.v1.SpeechClient;
+import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1.SpeechRecognitionResult;
+import com.google.cloud.speech.v1.SpeechSettings;
+import com.google.cloud.speech.v1.StreamingRecognitionConfig;
+import com.google.cloud.speech.v1.StreamingRecognitionResult;
+import com.google.cloud.speech.v1.StreamingRecognizeRequest;
+import com.google.cloud.speech.v1.StreamingRecognizeResponse;
+import com.google.cloud.speech.v1.WordInfo;
+import com.google.common.util.concurrent.SettableFuture;
+import com.google.protobuf.ByteString;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import javax.sound.sampled.AudioFormat;
+import javax.sound.sampled.AudioInputStream;
+import javax.sound.sampled.AudioSystem;
+import javax.sound.sampled.DataLine;
+import javax.sound.sampled.DataLine.Info;
+import javax.sound.sampled.TargetDataLine;
+import org.threeten.bp.Duration;
+
+public class Recognize {
+
+ /** Run speech recognition tasks. */
+ public static void main(String... args) throws Exception {
+ if (args.length < 1) {
+ System.out.println("Usage:");
+ System.out.printf(
+ "\tjava %s \"\" \"\"\n"
+ + "Commands:\n"
+ + "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
+ + "\t| wordoffsets | auto-punctuation | stream-punctuation \n"
+ + "\t| enhanced-model | model-selection | multi-channel\n"
+ + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ + "for a Cloud Storage resource (gs://...)\n",
+ Recognize.class.getCanonicalName());
+ return;
+ }
+ String command = args[0];
+ String path = args.length > 1 ? args[1] : "";
+
+ // Use command and GCS path pattern to invoke transcription.
+ if (command.equals("syncrecognize")) {
+ if (path.startsWith("gs://")) {
+ syncRecognizeGcs(path);
+ } else {
+ syncRecognizeFile(path);
+ }
+ } else if (command.equals("wordoffsets")) {
+ if (path.startsWith("gs://")) {
+ asyncRecognizeWords(path);
+ } else {
+ syncRecognizeWords(path);
+ }
+ } else if (command.equals("asyncrecognize")) {
+ if (path.startsWith("gs://")) {
+ asyncRecognizeGcs(path);
+ } else {
+ asyncRecognizeFile(path);
+ }
+ } else if (command.equals("streamrecognize")) {
+ streamingRecognizeFile(path);
+ } else if (command.equals("micstreamrecognize")) {
+ streamingMicRecognize();
+ } else if (command.equals("auto-punctuation")) {
+ if (path.startsWith("gs://")) {
+ transcribeGcsWithAutomaticPunctuation(path);
+ } else {
+ transcribeFileWithAutomaticPunctuation(path);
+ }
+ } else if (command.equals("stream-punctuation")) {
+ streamingTranscribeWithAutomaticPunctuation(path);
+ } else if (command.equals("enhanced-model")) {
+ transcribeFileWithEnhancedModel(path);
+ } else if (command.equals("model-selection")) {
+ if (path.startsWith("gs://")) {
+ transcribeModelSelectionGcs(path);
+ } else {
+ transcribeModelSelection(path);
+ }
+ } else if (command.equals("multi-channel")) {
+ if (path.startsWith("gs://")) {
+ transcribeMultiChannelGcs(path);
+ } else {
+ transcribeMultiChannel(path);
+ }
+ }
+ }
+
+ // [START speech_transcribe_sync]
+ /**
+ * Performs speech recognition on raw PCM audio and prints the transcription.
+ *
+ * @param fileName the path to a PCM audio file to transcribe.
+ */
+ public static void syncRecognizeFile(String fileName) throws Exception {
+ try (SpeechClient speech = SpeechClient.create()) {
+ Path path = Paths.get(fileName);
+ byte[] data = Files.readAllBytes(path);
+ ByteString audioBytes = ByteString.copyFrom(data);
+
+ // Configure request with local raw PCM audio
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .build();
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build();
+
+ // Use blocking call to get audio transcript
+ RecognizeResponse response = speech.recognize(config, audio);
+ List results = response.getResultsList();
+
+ for (SpeechRecognitionResult result : results) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcription: %s%n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_transcribe_sync]
+
+ /**
+ * Performs sync recognize and prints word time offsets.
+ *
+ * @param fileName the path to a PCM audio file to transcribe get offsets on.
+ */
+ public static void syncRecognizeWords(String fileName) throws Exception {
+ try (SpeechClient speech = SpeechClient.create()) {
+ Path path = Paths.get(fileName);
+ byte[] data = Files.readAllBytes(path);
+ ByteString audioBytes = ByteString.copyFrom(data);
+
+ // Configure request with local raw PCM audio
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setEnableWordTimeOffsets(true)
+ .build();
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build();
+
+ // Use blocking call to get audio transcript
+ RecognizeResponse response = speech.recognize(config, audio);
+ List results = response.getResultsList();
+
+ for (SpeechRecognitionResult result : results) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcription: %s%n", alternative.getTranscript());
+ for (WordInfo wordInfo : alternative.getWordsList()) {
+ System.out.println(wordInfo.getWord());
+ System.out.printf(
+ "\t%s.%s sec - %s.%s sec\n",
+ wordInfo.getStartTime().getSeconds(),
+ wordInfo.getStartTime().getNanos() / 100000000,
+ wordInfo.getEndTime().getSeconds(),
+ wordInfo.getEndTime().getNanos() / 100000000);
+ }
+ }
+ }
+ }
+
+ // [START speech_transcribe_sync_gcs]
+ /**
+ * Performs speech recognition on remote FLAC file and prints the transcription.
+ *
+ * @param gcsUri the path to the remote FLAC audio file to transcribe.
+ */
+ public static void syncRecognizeGcs(String gcsUri) throws Exception {
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
+ try (SpeechClient speech = SpeechClient.create()) {
+ // Builds the request for remote FLAC file
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.FLAC)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .build();
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use blocking call for getting audio transcript
+ RecognizeResponse response = speech.recognize(config, audio);
+ List results = response.getResultsList();
+
+ for (SpeechRecognitionResult result : results) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcription: %s%n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_transcribe_sync_gcs]
+
+ // [START speech_transcribe_async]
+ /**
+ * Performs non-blocking speech recognition on raw PCM audio and prints the transcription. Note
+ * that transcription is limited to 60 seconds audio.
+ *
+ * @param fileName the path to a PCM audio file to transcribe.
+ */
+ public static void asyncRecognizeFile(String fileName) throws Exception {
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
+ try (SpeechClient speech = SpeechClient.create()) {
+
+ Path path = Paths.get(fileName);
+ byte[] data = Files.readAllBytes(path);
+ ByteString audioBytes = ByteString.copyFrom(data);
+
+ // Configure request with local raw PCM audio
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .build();
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speech.longRunningRecognizeAsync(config, audio);
+
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+
+ List results = response.get().getResultsList();
+
+ for (SpeechRecognitionResult result : results) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcription: %s%n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_transcribe_async]
+
+ // [START speech_transcribe_async_word_time_offsets_gcs]
+ /**
+ * Performs non-blocking speech recognition on remote FLAC file and prints the transcription as
+ * well as word time offsets.
+ *
+ * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
+ */
+ public static void asyncRecognizeWords(String gcsUri) throws Exception {
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
+ try (SpeechClient speech = SpeechClient.create()) {
+
+ // Configure remote file request for FLAC
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.FLAC)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setEnableWordTimeOffsets(true)
+ .build();
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speech.longRunningRecognizeAsync(config, audio);
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+
+ List results = response.get().getResultsList();
+
+ for (SpeechRecognitionResult result : results) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcription: %s\n", alternative.getTranscript());
+ for (WordInfo wordInfo : alternative.getWordsList()) {
+ System.out.println(wordInfo.getWord());
+ System.out.printf(
+ "\t%s.%s sec - %s.%s sec\n",
+ wordInfo.getStartTime().getSeconds(),
+ wordInfo.getStartTime().getNanos() / 100000000,
+ wordInfo.getEndTime().getSeconds(),
+ wordInfo.getEndTime().getNanos() / 100000000);
+ }
+ }
+ }
+ }
+ // [END speech_transcribe_async_word_time_offsets_gcs]
+
+ // [START speech_transcribe_async_gcs]
+ /**
+ * Performs non-blocking speech recognition on remote FLAC file and prints the transcription.
+ *
+ * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
+ */
+ public static void asyncRecognizeGcs(String gcsUri) throws Exception {
+ // Configure polling algorithm
+ SpeechSettings.Builder speechSettings = SpeechSettings.newBuilder();
+ TimedRetryAlgorithm timedRetryAlgorithm =
+ OperationTimedPollAlgorithm.create(
+ RetrySettings.newBuilder()
+ .setInitialRetryDelay(Duration.ofMillis(500L))
+ .setRetryDelayMultiplier(1.5)
+ .setMaxRetryDelay(Duration.ofMillis(5000L))
+ .setInitialRpcTimeout(Duration.ZERO) // ignored
+ .setRpcTimeoutMultiplier(1.0) // ignored
+ .setMaxRpcTimeout(Duration.ZERO) // ignored
+ .setTotalTimeout(Duration.ofHours(24L)) // set polling timeout to 24 hours
+ .build());
+ speechSettings.longRunningRecognizeOperationSettings().setPollingAlgorithm(timedRetryAlgorithm);
+
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
+ try (SpeechClient speech = SpeechClient.create(speechSettings.build())) {
+
+ // Configure remote file request for FLAC
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.FLAC)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .build();
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speech.longRunningRecognizeAsync(config, audio);
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+
+ List results = response.get().getResultsList();
+
+ for (SpeechRecognitionResult result : results) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcription: %s\n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_transcribe_async_gcs]
+
+ // [START speech_transcribe_streaming]
+ /**
+ * Performs streaming speech recognition on raw PCM audio data.
+ *
+ * @param fileName the path to a PCM audio file to transcribe.
+ */
+ public static void streamingRecognizeFile(String fileName) throws Exception, IOException {
+ Path path = Paths.get(fileName);
+ byte[] data = Files.readAllBytes(path);
+
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
+ try (SpeechClient speech = SpeechClient.create()) {
+
+ // Configure request with local raw PCM audio
+ RecognitionConfig recConfig =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setModel("default")
+ .build();
+ StreamingRecognitionConfig config =
+ StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
+
+ class ResponseApiStreamingObserver implements ApiStreamObserver {
+ private final SettableFuture> future = SettableFuture.create();
+ private final List messages = new java.util.ArrayList();
+
+ @Override
+ public void onNext(T message) {
+ messages.add(message);
+ }
+
+ @Override
+ public void onError(Throwable t) {
+ future.setException(t);
+ }
+
+ @Override
+ public void onCompleted() {
+ future.set(messages);
+ }
+
+ // Returns the SettableFuture object to get received messages / exceptions.
+ public SettableFuture> future() {
+ return future;
+ }
+ }
+
+ ResponseApiStreamingObserver responseObserver =
+ new ResponseApiStreamingObserver<>();
+
+ BidiStreamingCallable callable =
+ speech.streamingRecognizeCallable();
+
+ ApiStreamObserver requestObserver =
+ callable.bidiStreamingCall(responseObserver);
+
+ // The first request must **only** contain the audio configuration:
+ requestObserver.onNext(
+ StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build());
+
+ // Subsequent requests must **only** contain the audio data.
+ requestObserver.onNext(
+ StreamingRecognizeRequest.newBuilder()
+ .setAudioContent(ByteString.copyFrom(data))
+ .build());
+
+ // Mark transmission as completed after sending the data.
+ requestObserver.onCompleted();
+
+ List responses = responseObserver.future().get();
+
+ for (StreamingRecognizeResponse response : responses) {
+ // For streaming recognize, the results list has one is_final result (if available) followed
+ // by a number of in-progress results (if iterim_results is true) for subsequent utterances.
+ // Just print the first result here.
+ StreamingRecognitionResult result = response.getResultsList().get(0);
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_transcribe_streaming]
+
+ // [START speech_sync_recognize_punctuation]
+ /**
+ * Performs transcription with automatic punctuation on raw PCM audio data.
+ *
+ * @param fileName the path to a PCM audio file to transcribe.
+ */
+ public static void transcribeFileWithAutomaticPunctuation(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ // Configure request with local raw PCM audio
+ RecognitionConfig recConfig =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setEnableAutomaticPunctuation(true)
+ .build();
+
+ // Get the contents of the local audio file
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = speechClient.recognize(recConfig, recognitionAudio);
+
+ // Just print the first result here.
+ SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
+
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+
+ // Print out the result
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+ // [END speech_sync_recognize_punctuation]
+
+ // [START speech_transcribe_auto_punctuation]
+ /**
+ * Performs transcription on remote FLAC file and prints the transcription.
+ *
+ * @param gcsUri the path to the remote FLAC audio file to transcribe.
+ */
+ public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception {
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ // Configure request with raw PCM audio
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.FLAC)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setEnableAutomaticPunctuation(true)
+ .build();
+
+ // Set the remote path for the audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speechClient.longRunningRecognizeAsync(config, audio);
+
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+
+ // Just print the first result here.
+ SpeechRecognitionResult result = response.get().getResultsList().get(0);
+
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+
+ // Print out the result
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+ // [END speech_transcribe_auto_punctuation]
+
+ // [START speech_stream_recognize_punctuation]
+ /**
+ * Performs streaming speech recognition on raw PCM audio data.
+ *
+ * @param fileName the path to a PCM audio file to transcribe.
+ */
+ public static void streamingTranscribeWithAutomaticPunctuation(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] data = Files.readAllBytes(path);
+
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
+ try (SpeechClient speech = SpeechClient.create()) {
+
+ // Configure request with local raw PCM audio
+ RecognitionConfig recConfig =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setEnableAutomaticPunctuation(true)
+ .build();
+
+ // Build the streaming config with the audio config
+ StreamingRecognitionConfig config =
+ StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
+
+ class ResponseApiStreamingObserver implements ApiStreamObserver {
+ private final SettableFuture> future = SettableFuture.create();
+ private final List messages = new java.util.ArrayList();
+
+ @Override
+ public void onNext(T message) {
+ messages.add(message);
+ }
+
+ @Override
+ public void onError(Throwable t) {
+ future.setException(t);
+ }
+
+ @Override
+ public void onCompleted() {
+ future.set(messages);
+ }
+
+ // Returns the SettableFuture object to get received messages / exceptions.
+ public SettableFuture> future() {
+ return future;
+ }
+ }
+
+ ResponseApiStreamingObserver responseObserver =
+ new ResponseApiStreamingObserver<>();
+
+ BidiStreamingCallable callable =
+ speech.streamingRecognizeCallable();
+
+ ApiStreamObserver requestObserver =
+ callable.bidiStreamingCall(responseObserver);
+
+ // The first request must **only** contain the audio configuration:
+ requestObserver.onNext(
+ StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build());
+
+ // Subsequent requests must **only** contain the audio data.
+ requestObserver.onNext(
+ StreamingRecognizeRequest.newBuilder()
+ .setAudioContent(ByteString.copyFrom(data))
+ .build());
+
+ // Mark transmission as completed after sending the data.
+ requestObserver.onCompleted();
+
+ List responses = responseObserver.future().get();
+
+ for (StreamingRecognizeResponse response : responses) {
+ // For streaming recognize, the results list has one is_final result (if available) followed
+ // by a number of in-progress results (if iterim_results is true) for subsequent utterances.
+ // Just print the first result here.
+ StreamingRecognitionResult result = response.getResultsList().get(0);
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_stream_recognize_punctuation]
+
+ // [START speech_transcribe_streaming_mic]
+ /** Performs microphone streaming speech recognition with a duration of 1 minute. */
+ public static void streamingMicRecognize() throws Exception {
+
+ ResponseObserver responseObserver = null;
+ try (SpeechClient client = SpeechClient.create()) {
+
+ responseObserver =
+ new ResponseObserver() {
+ ArrayList responses = new ArrayList<>();
+
+ public void onStart(StreamController controller) {}
+
+ public void onResponse(StreamingRecognizeResponse response) {
+ responses.add(response);
+ }
+
+ public void onComplete() {
+ for (StreamingRecognizeResponse response : responses) {
+ StreamingRecognitionResult result = response.getResultsList().get(0);
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+
+ public void onError(Throwable t) {
+ System.out.println(t);
+ }
+ };
+
+ ClientStream clientStream =
+ client.streamingRecognizeCallable().splitCall(responseObserver);
+
+ RecognitionConfig recognitionConfig =
+ RecognitionConfig.newBuilder()
+ .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .build();
+ StreamingRecognitionConfig streamingRecognitionConfig =
+ StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
+
+ StreamingRecognizeRequest request =
+ StreamingRecognizeRequest.newBuilder()
+ .setStreamingConfig(streamingRecognitionConfig)
+ .build(); // The first request in a streaming call has to be a config
+
+ clientStream.send(request);
+ // SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
+ // bigEndian: false
+ AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
+ DataLine.Info targetInfo =
+ new Info(
+ TargetDataLine.class,
+ audioFormat); // Set the system information to read from the microphone audio stream
+
+ if (!AudioSystem.isLineSupported(targetInfo)) {
+ System.out.println("Microphone not supported");
+ System.exit(0);
+ }
+ // Target data line captures the audio stream the microphone produces.
+ TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
+ targetDataLine.open(audioFormat);
+ targetDataLine.start();
+ System.out.println("Start speaking");
+ long startTime = System.currentTimeMillis();
+ // Audio Input Stream
+ AudioInputStream audio = new AudioInputStream(targetDataLine);
+ while (true) {
+ long estimatedTime = System.currentTimeMillis() - startTime;
+ byte[] data = new byte[6400];
+ audio.read(data);
+ if (estimatedTime > 60000) { // 60 seconds
+ System.out.println("Stop speaking.");
+ targetDataLine.stop();
+ targetDataLine.close();
+ break;
+ }
+ request =
+ StreamingRecognizeRequest.newBuilder()
+ .setAudioContent(ByteString.copyFrom(data))
+ .build();
+ clientStream.send(request);
+ }
+ } catch (Exception e) {
+ System.out.println(e);
+ }
+ responseObserver.onComplete();
+ }
+ // [END speech_transcribe_streaming_mic]
+
+ // [START speech_transcribe_enhanced_model]
+ /**
+ * Transcribe the given audio file using an enhanced model.
+ *
+ * @param fileName the path to an audio file.
+ */
+ public static void transcribeFileWithEnhancedModel(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ // Get the contents of the local audio file
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+
+ // Configure request to enable enhanced models
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(8000)
+ .setUseEnhanced(true)
+ // A model must be specified to use enhanced model.
+ .setModel("phone_call")
+ .build();
+
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
+
+ // Print out the results
+ for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternatives(0);
+ System.out.format("Transcript: %s\n\n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_transcribe_enhanced_model]
+
+ // [START speech_transcribe_model_selection]
+ /**
+ * Performs transcription of the given audio file synchronously with the selected model.
+ *
+ * @param fileName the path to a audio file to transcribe
+ */
+ public static void transcribeModelSelection(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speech = SpeechClient.create()) {
+ // Configure request with video media type
+ RecognitionConfig recConfig =
+ RecognitionConfig.newBuilder()
+ // encoding may either be omitted or must match the value in the file header
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ // sample rate hertz may be either be omitted or must match the value in the file
+ // header
+ .setSampleRateHertz(16000)
+ .setModel("video")
+ .build();
+
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+
+ RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio);
+ // Just print the first result here.
+ SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+ // [END speech_transcribe_model_selection]
+
+ // [START speech_transcribe_model_selection_gcs]
+ /**
+ * Performs transcription of the remote audio file asynchronously with the selected model.
+ *
+ * @param gcsUri the path to the remote audio file to transcribe.
+ */
+ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
+ try (SpeechClient speech = SpeechClient.create()) {
+
+ // Configure request with video media type
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ // encoding may either be omitted or must match the value in the file header
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ // sample rate hertz may be either be omitted or must match the value in the file
+ // header
+ .setSampleRateHertz(16000)
+ .setModel("video")
+ .build();
+
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speech.longRunningRecognizeAsync(config, audio);
+
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+
+ List results = response.get().getResultsList();
+
+ // Just print the first result here.
+ SpeechRecognitionResult result = results.get(0);
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+ // [END speech_transcribe_model_selection_gcs]
+
+ // [START speech_transcribe_multichannel]
+ /**
+ * Transcribe a local audio file with multi-channel recognition
+ *
+ * @param fileName the path to local audio file
+ */
+ public static void transcribeMultiChannel(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ // Get the contents of the local audio file
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+
+ // Configure request to enable multiple channels
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(44100)
+ .setAudioChannelCount(2)
+ .setEnableSeparateRecognitionPerChannel(true)
+ .build();
+
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
+
+ // Print out the results
+ for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternatives(0);
+ System.out.format("Transcript : %s\n", alternative.getTranscript());
+ System.out.printf("Channel Tag : %s\n", result.getChannelTag());
+ }
+ }
+ }
+ // [END speech_transcribe_multichannel]
+
+ // [START speech_transcribe_multichannel_gcs]
+ /**
+ * Transcribe a remote audio file with multi-channel recognition
+ *
+ * @param gcsUri the path to the audio file
+ */
+ public static void transcribeMultiChannelGcs(String gcsUri) throws Exception {
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+
+ // Configure request to enable multiple channels
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(44100)
+ .setAudioChannelCount(2)
+ .setEnableSeparateRecognitionPerChannel(true)
+ .build();
+
+ // Set the remote path for the audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speechClient.longRunningRecognizeAsync(config, audio);
+
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+ // Just print the first result here.
+ for (SpeechRecognitionResult result : response.get().getResultsList()) {
+
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+
+ // Print out the result
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ System.out.printf("Channel Tag : %s\n", result.getChannelTag());
+ }
+ }
+ }
+ // [END speech_transcribe_multichannel_gcs]
+}
diff --git a/speech/src/main/java/com/example/speech/RecognizeBeta.java b/speech/src/main/java/com/example/speech/RecognizeBeta.java
new file mode 100644
index 00000000000..99544b79eff
--- /dev/null
+++ b/speech/src/main/java/com/example/speech/RecognizeBeta.java
@@ -0,0 +1,532 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata;
+import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse;
+import com.google.cloud.speech.v1p1beta1.RecognitionAudio;
+import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
+import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding;
+import com.google.cloud.speech.v1p1beta1.RecognitionMetadata;
+import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.InteractionType;
+import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.MicrophoneDistance;
+import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.RecordingDeviceType;
+import com.google.cloud.speech.v1p1beta1.RecognizeResponse;
+import com.google.cloud.speech.v1p1beta1.SpeakerDiarizationConfig;
+import com.google.cloud.speech.v1p1beta1.SpeechClient;
+import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult;
+import com.google.cloud.speech.v1p1beta1.WordInfo;
+import com.google.protobuf.ByteString;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+
+public class RecognizeBeta {
+
+ /** Run speech recognition tasks. */
+ public static void main(String... args) throws Exception {
+ if (args.length < 1) {
+ System.out.println("Usage:");
+ System.out.printf(
+ "\tjava %s \"\" \"\"\n"
+ + "Commands:\n"
+ + "\t metadata | diarization | multi-channel |\n"
+ + "\t multi-language | word-level-conf\n"
+ + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ + "for a Cloud Storage resource (gs://...)\n",
+ RecognizeBeta.class.getCanonicalName());
+ return;
+ }
+ String command = args[0];
+ String path = args.length > 1 ? args[1] : "";
+
+ // Use command and GCS path pattern to invoke transcription.
+ if (command.equals("metadata")) {
+ transcribeFileWithMetadata(path);
+ } else if (command.equals("diarization")) {
+ if (path.startsWith("gs://")) {
+ transcribeDiarizationGcs(path);
+ } else {
+ transcribeDiarization(path);
+ }
+ } else if (command.equals("multi-channel")) {
+ if (path.startsWith("gs://")) {
+ transcribeMultiChannelGcs(path);
+ } else {
+ transcribeMultiChannel(path);
+ }
+ } else if (command.equals("multi-language")) {
+ if (path.startsWith("gs://")) {
+ transcribeMultiLanguageGcs(path);
+ } else {
+ transcribeMultiLanguage(path);
+ }
+ } else if (command.equals("word-level-conf")) {
+ if (path.startsWith("gs://")) {
+ transcribeWordLevelConfidenceGcs(path);
+ } else {
+ transcribeWordLevelConfidence(path);
+ }
+ }
+ }
+
+ // [START speech_transcribe_recognition_metadata_beta]
+ /**
+ * Transcribe the given audio file and include recognition metadata in the request.
+ *
+ * @param fileName the path to an audio file.
+ */
+ public static void transcribeFileWithMetadata(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ // Get the contents of the local audio file
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+
+ // Construct a recognition metadata object.
+ // Most metadata fields are specified as enums that can be found
+ // in speech.enums.RecognitionMetadata
+ RecognitionMetadata metadata =
+ RecognitionMetadata.newBuilder()
+ .setInteractionType(InteractionType.DISCUSSION)
+ .setMicrophoneDistance(MicrophoneDistance.NEARFIELD)
+ .setRecordingDeviceType(RecordingDeviceType.SMARTPHONE)
+ .setRecordingDeviceName("Pixel 2 XL") // Some metadata fields are free form strings
+ // And some are integers, for instance the 6 digit NAICS code
+ // https://www.naics.com/search/
+ .setIndustryNaicsCodeOfAudio(519190)
+ .build();
+
+ // Configure request to enable enhanced models
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(8000)
+ .setMetadata(metadata) // Add the metadata to the config
+ .build();
+
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
+
+ // Print out the results
+ for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternatives(0);
+ System.out.format("Transcript: %s\n\n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_transcribe_recognition_metadata_beta]
+
+ // [START speech_transcribe_diarization_beta]
+ /**
+ * Transcribe the given audio file using speaker diarization.
+ *
+ * @param fileName the path to an audio file.
+ */
+ public static void transcribeDiarization(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ // Get the contents of the local audio file
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+
+ SpeakerDiarizationConfig speakerDiarizationConfig =
+ SpeakerDiarizationConfig.newBuilder()
+ .setEnableSpeakerDiarization(true)
+ .setMinSpeakerCount(2)
+ .setMaxSpeakerCount(2)
+ .build();
+
+ // Configure request to enable Speaker diarization
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(8000)
+ .setDiarizationConfig(speakerDiarizationConfig)
+ .build();
+
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
+
+ // Speaker Tags are only included in the last result object, which has only one alternative.
+ SpeechRecognitionAlternative alternative =
+ recognizeResponse.getResults(recognizeResponse.getResultsCount() - 1).getAlternatives(0);
+
+ // The alternative is made up of WordInfo objects that contain the speaker_tag.
+ WordInfo wordInfo = alternative.getWords(0);
+ int currentSpeakerTag = wordInfo.getSpeakerTag();
+
+ // For each word, get all the words associated with one speaker, once the speaker changes,
+ // add a new line with the new speaker and their spoken words.
+ StringBuilder speakerWords =
+ new StringBuilder(
+ String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+
+ for (int i = 1; i < alternative.getWordsCount(); i++) {
+ wordInfo = alternative.getWords(i);
+ if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
+ speakerWords.append(" ");
+ speakerWords.append(wordInfo.getWord());
+ } else {
+ speakerWords.append(
+ String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+ currentSpeakerTag = wordInfo.getSpeakerTag();
+ }
+ }
+
+ System.out.println(speakerWords.toString());
+ }
+ }
+ // [END speech_transcribe_diarization_beta]
+
+ // [START speech_transcribe_diarization_gcs_beta]
+ /**
+ * Transcribe a remote audio file using speaker diarization.
+ *
+ * @param gcsUri the path to an audio file.
+ */
+ public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ SpeakerDiarizationConfig speakerDiarizationConfig =
+ SpeakerDiarizationConfig.newBuilder()
+ .setEnableSpeakerDiarization(true)
+ .setMinSpeakerCount(2)
+ .setMaxSpeakerCount(2)
+ .build();
+
+ // Configure request to enable Speaker diarization
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(8000)
+ .setDiarizationConfig(speakerDiarizationConfig)
+ .build();
+
+ // Set the remote path for the audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speechClient.longRunningRecognizeAsync(config, audio);
+
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+
+ // Speaker Tags are only included in the last result object, which has only one alternative.
+ LongRunningRecognizeResponse longRunningRecognizeResponse = response.get();
+ SpeechRecognitionAlternative alternative =
+ longRunningRecognizeResponse
+ .getResults(longRunningRecognizeResponse.getResultsCount() - 1)
+ .getAlternatives(0);
+
+ // The alternative is made up of WordInfo objects that contain the speaker_tag.
+ WordInfo wordInfo = alternative.getWords(0);
+ int currentSpeakerTag = wordInfo.getSpeakerTag();
+
+ // For each word, get all the words associated with one speaker, once the speaker changes,
+ // add a new line with the new speaker and their spoken words.
+ StringBuilder speakerWords =
+ new StringBuilder(
+ String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+
+ for (int i = 1; i < alternative.getWordsCount(); i++) {
+ wordInfo = alternative.getWords(i);
+ if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
+ speakerWords.append(" ");
+ speakerWords.append(wordInfo.getWord());
+ } else {
+ speakerWords.append(
+ String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+ currentSpeakerTag = wordInfo.getSpeakerTag();
+ }
+ }
+
+ System.out.println(speakerWords.toString());
+ }
+ }
+ // [END speech_transcribe_diarization_gcs_beta]
+
+ // [START speech_transcribe_multichannel_beta]
+ /**
+ * Transcribe a local audio file with multi-channel recognition
+ *
+ * @param fileName the path to local audio file
+ */
+ public static void transcribeMultiChannel(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ // Get the contents of the local audio file
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+
+ // Configure request to enable multiple channels
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(44100)
+ .setAudioChannelCount(2)
+ .setEnableSeparateRecognitionPerChannel(true)
+ .build();
+
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
+
+ // Print out the results
+ for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternatives(0);
+ System.out.format("Transcript : %s\n", alternative.getTranscript());
+ System.out.printf("Channel Tag : %s\n\n", result.getChannelTag());
+ }
+ }
+ }
+ // [END speech_transcribe_multichannel_beta]
+
+ // [START speech_transcribe_multichannel_gcs_beta]
+ /**
+ * Transcribe a remote audio file with multi-channel recognition
+ *
+ * @param gcsUri the path to the audio file
+ */
+ public static void transcribeMultiChannelGcs(String gcsUri) throws Exception {
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+
+ // Configure request to enable multiple channels
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(44100)
+ .setAudioChannelCount(2)
+ .setEnableSeparateRecognitionPerChannel(true)
+ .build();
+
+ // Set the remote path for the audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speechClient.longRunningRecognizeAsync(config, audio);
+
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+ // Just print the first result here.
+ for (SpeechRecognitionResult result : response.get().getResultsList()) {
+
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+
+ // Print out the result
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ System.out.printf("Channel Tag : %s\n\n", result.getChannelTag());
+ }
+ }
+ }
+ // [END speech_transcribe_multichannel_gcs_beta]
+
+ // [START speech_transcribe_multilanguage_beta]
+ /**
+ * Transcribe a local audio file with multi-language recognition
+ *
+ * @param fileName the path to the audio file
+ */
+ public static void transcribeMultiLanguage(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ // Get the contents of the local audio file
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+ ArrayList languageList = new ArrayList<>();
+ languageList.add("es-ES");
+ languageList.add("en-US");
+
+ // Configure request to enable multiple languages
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setSampleRateHertz(16000)
+ .setLanguageCode("ja-JP")
+ .addAllAlternativeLanguageCodes(languageList)
+ .build();
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
+
+ // Print out the results
+ for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternatives(0);
+ System.out.format("Transcript : %s\n\n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_transcribe_multilanguage_beta]
+
+ // [START speech_transcribe_multilanguage_gcs_beta]
+ /**
+ * Transcribe a remote audio file with multi-language recognition
+ *
+ * @param gcsUri the path to the remote audio file
+ */
+ public static void transcribeMultiLanguageGcs(String gcsUri) throws Exception {
+ try (SpeechClient speechClient = SpeechClient.create()) {
+
+ ArrayList languageList = new ArrayList<>();
+ languageList.add("es-ES");
+ languageList.add("en-US");
+
+ // Configure request to enable multiple languages
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setSampleRateHertz(16000)
+ .setLanguageCode("ja-JP")
+ .addAllAlternativeLanguageCodes(languageList)
+ .build();
+
+ // Set the remote path for the audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speechClient.longRunningRecognizeAsync(config, audio);
+
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+
+ for (SpeechRecognitionResult result : response.get().getResultsList()) {
+
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+
+ // Print out the result
+ System.out.printf("Transcript : %s\n\n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_transcribe_multilanguage_gcs_beta]
+
+ // [START speech_transcribe_word_level_confidence_beta]
+ /**
+ * Transcribe a local audio file with word level confidence
+ *
+ * @param fileName the path to the local audio file
+ */
+ public static void transcribeWordLevelConfidence(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+ // Configure request to enable word level confidence
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setSampleRateHertz(16000)
+ .setLanguageCode("en-US")
+ .setEnableWordConfidence(true)
+ .build();
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
+
+ // Print out the results
+ for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternatives(0);
+ System.out.format("Transcript : %s\n", alternative.getTranscript());
+ System.out.format(
+ "First Word and Confidence : %s %s \n",
+ alternative.getWords(0).getWord(), alternative.getWords(0).getConfidence());
+ }
+ }
+ }
+ // [END speech_transcribe_word_level_confidence_beta]
+
+ // [START speech_transcribe_word_level_confidence_gcs_beta]
+ /**
+ * Transcribe a remote audio file with word level confidence
+ *
+ * @param gcsUri path to the remote audio file
+ */
+ public static void transcribeWordLevelConfidenceGcs(String gcsUri) throws Exception {
+ try (SpeechClient speechClient = SpeechClient.create()) {
+
+ // Configure request to enable word level confidence
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.FLAC)
+ .setSampleRateHertz(44100)
+ .setLanguageCode("en-US")
+ .setEnableWordConfidence(true)
+ .build();
+
+ // Set the remote path for the audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speechClient.longRunningRecognizeAsync(config, audio);
+
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+ // Just print the first result here.
+ SpeechRecognitionResult result = response.get().getResultsList().get(0);
+
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ // Print out the result
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ System.out.format(
+ "First Word and Confidence : %s %s \n",
+ alternative.getWords(0).getWord(), alternative.getWords(0).getConfidence());
+ }
+ }
+ // [END speech_transcribe_word_level_confidence_gcs_beta]
+}
diff --git a/speech/src/main/java/com/example/speech/SpeechAdaptation.java b/speech/src/main/java/com/example/speech/SpeechAdaptation.java
new file mode 100644
index 00000000000..4c51672d134
--- /dev/null
+++ b/speech/src/main/java/com/example/speech/SpeechAdaptation.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+// [START speech_adaptation_beta]
+import com.google.cloud.speech.v1p1beta1.RecognitionAudio;
+import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
+import com.google.cloud.speech.v1p1beta1.RecognizeRequest;
+import com.google.cloud.speech.v1p1beta1.RecognizeResponse;
+import com.google.cloud.speech.v1p1beta1.SpeechClient;
+import com.google.cloud.speech.v1p1beta1.SpeechContext;
+import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult;
+import java.io.IOException;
+
+public class SpeechAdaptation {
+
+ public void speechAdaptation() throws IOException {
+ String uriPath = "gs://cloud-samples-data/speech/brooklyn_bridge.mp3";
+ speechAdaptation(uriPath);
+ }
+
+ public static void speechAdaptation(String uriPath) throws IOException {
+ // Initialize client that will be used to send requests. This client only needs to be created
+ // once, and can be reused for multiple requests. After completing all of your requests, call
+ // the "close" method on the client to safely clean up any remaining background resources.
+ try (SpeechClient speechClient = SpeechClient.create()) {
+
+ // Provides "hints" to the speech recognizer to favor specific words and phrases in the
+ // results.
+ // https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1p1beta1#google.cloud.speech.v1p1beta1.SpeechContext
+ SpeechContext speechContext =
+ SpeechContext.newBuilder().addPhrases("Brooklyn Bridge").setBoost(20.0F).build();
+ // Configure recognition config to match your audio file.
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(RecognitionConfig.AudioEncoding.MP3)
+ .setSampleRateHertz(44100)
+ .setLanguageCode("en-US")
+ .addSpeechContexts(speechContext)
+ .build();
+ // Set the path to your audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(uriPath).build();
+
+ // Make the request
+ RecognizeRequest request =
+ RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build();
+
+ // Display the results
+ RecognizeResponse response = speechClient.recognize(request);
+ for (SpeechRecognitionResult result : response.getResultsList()) {
+ // First alternative is the most probable result
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcript: %s\n", alternative.getTranscript());
+ }
+ }
+ }
+}
+// [END speech_adaptation_beta]
diff --git a/speech/src/main/java/com/example/speech/SpeechProfanityFilter.java b/speech/src/main/java/com/example/speech/SpeechProfanityFilter.java
new file mode 100644
index 00000000000..b8ee99215d5
--- /dev/null
+++ b/speech/src/main/java/com/example/speech/SpeechProfanityFilter.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+// [START speech_transcribe_with_profanity_filter_gcs]
+import com.google.cloud.speech.v1.RecognitionAudio;
+import com.google.cloud.speech.v1.RecognitionConfig;
+import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
+import com.google.cloud.speech.v1.RecognizeResponse;
+import com.google.cloud.speech.v1.SpeechClient;
+import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1.SpeechRecognitionResult;
+import java.util.List;
+
+public class SpeechProfanityFilter {
+
+ public void speechProfanityFilter() throws Exception {
+ String uriPath = "gs://cloud-samples-tests/speech/brooklyn.flac";
+ speechProfanityFilter(uriPath);
+ }
+
+ /**
+ * Transcribe a remote audio file with multi-channel recognition
+ *
+ * @param gcsUri the path to the audio file
+ */
+ public static void speechProfanityFilter(String gcsUri) throws Exception {
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
+ try (SpeechClient speech = SpeechClient.create()) {
+
+ // Configure remote file request
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.FLAC)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setProfanityFilter(true)
+ .build();
+
+ // Set the remote path for the audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use blocking call to get audio transcript
+ RecognizeResponse response = speech.recognize(config, audio);
+ List results = response.getResultsList();
+
+ for (SpeechRecognitionResult result : results) {
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcription: %s\n", alternative.getTranscript());
+ }
+ }
+ }
+}
+// [END speech_transcribe_with_profanity_filter_gcs]
diff --git a/speech/src/main/java/com/example/speech/TranscribeDiarization.java b/speech/src/main/java/com/example/speech/TranscribeDiarization.java
new file mode 100644
index 00000000000..6778f4c5907
--- /dev/null
+++ b/speech/src/main/java/com/example/speech/TranscribeDiarization.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+// [START speech_transcribe_diarization]
+
+import com.google.cloud.speech.v1.RecognitionAudio;
+import com.google.cloud.speech.v1.RecognitionConfig;
+import com.google.cloud.speech.v1.RecognizeResponse;
+import com.google.cloud.speech.v1.SpeakerDiarizationConfig;
+import com.google.cloud.speech.v1.SpeechClient;
+import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1.WordInfo;
+import com.google.protobuf.ByteString;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+class TranscribeDiarization {
+
+ static void transcribeDiarization() throws IOException {
+ // TODO(developer): Replace these variables before running the sample.
+ String fileName = "resources/commercial_mono.wav";
+ transcribeDiarization(fileName);
+ }
+
+ // Transcribe the given audio file using speaker diarization.
+ static void transcribeDiarization(String fileName) throws IOException {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ // Initialize client that will be used to send requests. This client only needs to be created
+ // once, and can be reused for multiple requests. After completing all of your requests, call
+ // the "close" method on the client to safely clean up any remaining background resources.
+ try (SpeechClient client = SpeechClient.create()) {
+ // Get the contents of the local audio file
+ RecognitionAudio recognitionAudio =
+ RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+ SpeakerDiarizationConfig speakerDiarizationConfig =
+ SpeakerDiarizationConfig.newBuilder()
+ .setEnableSpeakerDiarization(true)
+ .setMinSpeakerCount(2)
+ .setMaxSpeakerCount(2)
+ .build();
+ // Configure request to enable Speaker diarization
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(8000)
+ .setDiarizationConfig(speakerDiarizationConfig)
+ .build();
+
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = client.recognize(config, recognitionAudio);
+
+ // Speaker Tags are only included in the last result object, which has only one alternative.
+ SpeechRecognitionAlternative alternative =
+ recognizeResponse.getResults(recognizeResponse.getResultsCount() - 1).getAlternatives(0);
+ // The alternative is made up of WordInfo objects that contain the speaker_tag.
+ WordInfo wordInfo = alternative.getWords(0);
+ int currentSpeakerTag = wordInfo.getSpeakerTag();
+ // For each word, get all the words associated with one speaker, once the speaker changes,
+ // add a new line with the new speaker and their spoken words.
+ StringBuilder speakerWords =
+ new StringBuilder(
+ String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+ for (int i = 1; i < alternative.getWordsCount(); i++) {
+ wordInfo = alternative.getWords(i);
+ if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
+ speakerWords.append(" ");
+ speakerWords.append(wordInfo.getWord());
+ } else {
+ speakerWords.append(
+ String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+ currentSpeakerTag = wordInfo.getSpeakerTag();
+ }
+ }
+ System.out.println(speakerWords.toString());
+ }
+ }
+}
+// [END speech_transcribe_diarization]
diff --git a/speech/src/main/java/com/example/speech/TranscribeDiarizationGcs.java b/speech/src/main/java/com/example/speech/TranscribeDiarizationGcs.java
new file mode 100644
index 00000000000..de7245b9a21
--- /dev/null
+++ b/speech/src/main/java/com/example/speech/TranscribeDiarizationGcs.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+// [START speech_transcribe_diarization_gcs]
+
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.cloud.speech.v1.LongRunningRecognizeMetadata;
+import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
+import com.google.cloud.speech.v1.RecognitionAudio;
+import com.google.cloud.speech.v1.RecognitionConfig;
+import com.google.cloud.speech.v1.SpeakerDiarizationConfig;
+import com.google.cloud.speech.v1.SpeechClient;
+import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1.WordInfo;
+import java.io.IOException;
+import java.util.concurrent.ExecutionException;
+
+public class TranscribeDiarizationGcs {
+
+ static void transcribeDiarizationGcs()
+ throws IOException, ExecutionException, InterruptedException {
+ // TODO(developer): Replace these variables before running the sample.
+ String gcsUri = "gs://cloud-samples-data/speech/commercial_mono.wav";
+ transcribeDiarizationGcs(gcsUri);
+ }
+
+ // Transcribe the give gcs file using speaker diarization
+ public static void transcribeDiarizationGcs(String gcsUri)
+ throws IOException, ExecutionException, InterruptedException {
+ // Initialize client that will be used to send requests. This client only needs to be created
+ // once, and can be reused for multiple requests. After completing all of your requests, call
+ // the "close" method on the client to safely clean up any remaining background resources.
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ SpeakerDiarizationConfig speakerDiarizationConfig =
+ SpeakerDiarizationConfig.newBuilder()
+ .setEnableSpeakerDiarization(true)
+ .setMinSpeakerCount(2)
+ .setMaxSpeakerCount(2)
+ .build();
+ // Configure request to enable Speaker diarization
+ RecognitionConfig config =
+ RecognitionConfig.newBuilder()
+ .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(8000)
+ .setDiarizationConfig(speakerDiarizationConfig)
+ .build();
+ // Set the remote path for the audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture future =
+ speechClient.longRunningRecognizeAsync(config, audio);
+ System.out.println("Waiting for response...");
+
+ // Speaker Tags are only included in the last result object, which has only one alternative.
+ LongRunningRecognizeResponse response = future.get();
+ SpeechRecognitionAlternative alternative =
+ response.getResults(response.getResultsCount() - 1).getAlternatives(0);
+ // The alternative is made up of WordInfo objects that contain the speaker_tag.
+ WordInfo wordInfo = alternative.getWords(0);
+ int currentSpeakerTag = wordInfo.getSpeakerTag();
+ // For each word, get all the words associated with one speaker, once the speaker changes,
+ // add a new line with the new speaker and their spoken words.
+ StringBuilder speakerWords =
+ new StringBuilder(
+ String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+ for (int i = 1; i < alternative.getWordsCount(); i++) {
+ wordInfo = alternative.getWords(i);
+ if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
+ speakerWords.append(" ");
+ speakerWords.append(wordInfo.getWord());
+ } else {
+ speakerWords.append(
+ String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+ currentSpeakerTag = wordInfo.getSpeakerTag();
+ }
+ }
+ System.out.println(speakerWords.toString());
+ }
+ }
+}
+// [END speech_transcribe_diarization_gcs]
diff --git a/speech/src/test/java/com/example/speech/QuickstartSampleIT.java b/speech/src/test/java/com/example/speech/QuickstartSampleIT.java
new file mode 100644
index 00000000000..ed739930161
--- /dev/null
+++ b/speech/src/test/java/com/example/speech/QuickstartSampleIT.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for quickstart sample. */
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class QuickstartSampleIT {
+ private ByteArrayOutputStream bout;
+ private PrintStream out;
+
+ @Before
+ public void setUp() {
+ bout = new ByteArrayOutputStream();
+ out = new PrintStream(bout);
+ System.setOut(out);
+ }
+
+ @After
+ public void tearDown() {
+ System.setOut(null);
+ }
+
+ @Test
+ public void testQuickstart() throws Exception {
+ // Act
+ QuickstartSample.main();
+
+ // Assert
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ }
+}
diff --git a/speech/src/test/java/com/example/speech/RecognizeBetaIT.java b/speech/src/test/java/com/example/speech/RecognizeBetaIT.java
new file mode 100644
index 00000000000..17fe91f1f12
--- /dev/null
+++ b/speech/src/test/java/com/example/speech/RecognizeBetaIT.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for speech recognize sample. */
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class RecognizeBetaIT {
+ private static final String BUCKET = "cloud-samples-data";
+
+ private ByteArrayOutputStream bout;
+ private PrintStream out;
+
+ // The path to the audio file to transcribe
+ private String audioFileName = "./resources/audio.raw";
+ private String multiChannelAudioFileName = "./resources/commercial_stereo.wav";
+ private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav";
+ private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn_bridge.flac";
+ private String gcsDiarizationAudioPath = "gs://" + BUCKET + "/speech/commercial_mono.wav";
+
+ // The path to the video file to transcribe
+ private String videoFileName = "./resources/Google_Gnome.wav";
+ private String gcsVideoPath = "gs://" + BUCKET + "/speech/Google_Gnome.wav";
+
+ private String recognitionAudioFile = "./resources/commercial_mono.wav";
+
+ @Before
+ public void setUp() {
+ bout = new ByteArrayOutputStream();
+ out = new PrintStream(bout);
+ System.setOut(out);
+ }
+
+ @After
+ public void tearDown() {
+ System.setOut(null);
+ }
+
+ @Test
+ public void testMetadata() throws Exception {
+ RecognizeBeta.transcribeFileWithMetadata(recognitionAudioFile);
+ String got = bout.toString();
+ assertThat(got).contains("Chrome");
+ }
+
+ @Test
+ public void testTranscribeDiarization() throws Exception {
+ RecognizeBeta.transcribeDiarization(recognitionAudioFile);
+ String got = bout.toString();
+ // Diarization (a beta product) can be flaky, therefore this test is only looking for output
+ assertThat(got).contains("Speaker");
+ }
+
+ @Test
+ public void testTranscribeDiarizationGcs() throws Exception {
+ RecognizeBeta.transcribeDiarizationGcs(gcsDiarizationAudioPath);
+ String got = bout.toString();
+ // Diarization (a beta product) can be flaky, therefore this test is only looking for output
+ assertThat(got).contains("Speaker");
+ }
+
+ @Test
+ public void testTranscribeMultiChannel() throws Exception {
+ RecognizeBeta.transcribeMultiChannel(multiChannelAudioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("Channel Tag : 1");
+ }
+
+ @Test
+ public void testTranscribeMultiChannelGcs() throws Exception {
+ RecognizeBeta.transcribeMultiChannelGcs(gcsMultiChannelAudioPath);
+ String got = bout.toString();
+ assertThat(got).contains("Channel Tag : 1");
+ }
+
+ @Test
+ public void testTranscribeMultiLanguage() throws Exception {
+ RecognizeBeta.transcribeMultiLanguage(videoFileName);
+ String got = bout.toString();
+ assertThat(got).contains("Transcript : OK Google");
+ }
+
+ @Test
+ public void testTranscribeMultiLanguageGcs() throws Exception {
+ RecognizeBeta.transcribeMultiLanguageGcs(gcsVideoPath);
+ String got = bout.toString();
+ assertThat(got).contains("Transcript : OK Google");
+ }
+
+ @Test
+ public void testTranscribeWordLevelConfidence() throws Exception {
+ RecognizeBeta.transcribeWordLevelConfidence(audioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("Transcript : how old is the Brooklyn Bridge");
+ assertThat(got).contains("First Word and Confidence : how");
+ }
+
+ @Test
+ public void testTranscribeWordLevelConfidenceGcs() throws Exception {
+ RecognizeBeta.transcribeWordLevelConfidenceGcs(gcsAudioPath);
+ String got = bout.toString();
+ assertThat(got).contains("Transcript : how old is the Brooklyn Bridge");
+ assertThat(got).contains("First Word and Confidence : how");
+ }
+}
diff --git a/speech/src/test/java/com/example/speech/RecognizeIT.java b/speech/src/test/java/com/example/speech/RecognizeIT.java
new file mode 100644
index 00000000000..2de1b0a1b45
--- /dev/null
+++ b/speech/src/test/java/com/example/speech/RecognizeIT.java
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for speech recognize sample. */
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class RecognizeIT {
+ private static final String BUCKET = "cloud-samples-tests";
+
+ private ByteArrayOutputStream bout;
+ private PrintStream out;
+
+ // The path to the audio file to transcribe
+ private String audioFileName = "./resources/audio.raw";
+ private String multiChannelAudioFileName = "./resources/commercial_stereo.wav";
+ private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac";
+ private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav";
+
+ private String recognitionAudioFile = "./resources/commercial_mono.wav";
+
+ // The path to the video file to transcribe
+ private String videoFileName = "./resources/Google_Gnome.wav";
+ private String gcsVideoPath = "gs://" + BUCKET + "/speech/Google_Gnome.wav";
+
+ @Before
+ public void setUp() {
+ bout = new ByteArrayOutputStream();
+ out = new PrintStream(bout);
+ System.setOut(out);
+ }
+
+ @After
+ public void tearDown() {
+ System.setOut(null);
+ }
+
+ @Test
+ public void testRecognizeFile() throws Exception {
+ Recognize.syncRecognizeFile(audioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ }
+
+ @Test
+ public void testRecognizeWordoffset() throws Exception {
+ Recognize.syncRecognizeWords(audioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ assertThat(got).contains("\t0.0 sec -");
+ }
+
+ @Test
+ public void testRecognizeGcs() throws Exception {
+ Recognize.syncRecognizeGcs(gcsAudioPath);
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ }
+
+ @Test
+ public void testAsyncRecognizeFile() throws Exception {
+ Recognize.asyncRecognizeFile(audioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ }
+
+ @Test
+ public void testAsyncRecognizeGcs() throws Exception {
+ Recognize.asyncRecognizeGcs(gcsAudioPath);
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ }
+
+ @Test
+ public void testAsyncWordoffset() throws Exception {
+ Recognize.asyncRecognizeWords(gcsAudioPath);
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ assertThat(got).contains("\t0.0 sec -");
+ }
+
+ @Test
+ public void testStreamRecognize() throws Exception {
+ Recognize.streamingRecognizeFile(audioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ }
+
+ @Test
+ public void testAutoPunctuation() throws Exception {
+ Recognize.transcribeFileWithAutomaticPunctuation(audioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("Transcript");
+ }
+
+ @Test
+ public void testGcsAutoPunctuation() throws Exception {
+ Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath);
+ String got = bout.toString();
+ assertThat(got).contains("Transcript");
+ }
+
+ @Test
+ public void testStreamAutoPunctuation() throws Exception {
+ Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("Transcript");
+ }
+
+ @Test
+ public void testEnhancedModel() throws Exception {
+ Recognize.transcribeFileWithEnhancedModel(recognitionAudioFile);
+ String got = bout.toString();
+ assertThat(got).contains("Chrome");
+ }
+
+ @Test
+ public void testModelSelection() throws Exception {
+ Recognize.transcribeModelSelection(videoFileName);
+ String got = bout.toString();
+ assertThat(got).contains("OK Google");
+ assertThat(got).contains("the weather outside is sunny");
+ }
+
+ @Test
+ public void testGcsModelSelection() throws Exception {
+ Recognize.transcribeModelSelectionGcs(gcsVideoPath);
+ String got = bout.toString();
+ assertThat(got).contains("OK Google");
+ assertThat(got).contains("the weather outside is sunny");
+ }
+
+ @Test
+ public void testTranscribeMultiChannel() throws Exception {
+ Recognize.transcribeMultiChannel(multiChannelAudioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("Channel Tag : 1");
+ }
+
+ @Test
+ public void testTranscribeMultiChannelGcs() throws Exception {
+ Recognize.transcribeMultiChannelGcs(gcsMultiChannelAudioPath);
+ String got = bout.toString();
+ assertThat(got).contains("Channel Tag : 1");
+ }
+}
diff --git a/speech/src/test/java/com/example/speech/SpeechAdaptationTest.java b/speech/src/test/java/com/example/speech/SpeechAdaptationTest.java
new file mode 100644
index 00000000000..a31b3637d5d
--- /dev/null
+++ b/speech/src/test/java/com/example/speech/SpeechAdaptationTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class SpeechAdaptationTest {
+ private static final String AUDIO_FILE = "gs://cloud-samples-data/speech/brooklyn_bridge.mp3";
+ private ByteArrayOutputStream bout;
+ private PrintStream out;
+
+ @Before
+ public void setUp() {
+ bout = new ByteArrayOutputStream();
+ out = new PrintStream(bout);
+ System.setOut(out);
+ }
+
+ @After
+ public void tearDown() {
+ System.setOut(null);
+ }
+
+ @Test
+ public void testTranscribeContextClasses() throws IOException {
+ SpeechAdaptation.speechAdaptation(AUDIO_FILE);
+ String got = bout.toString();
+ assertThat(got).contains("Transcript:");
+ }
+}
diff --git a/speech/src/test/java/com/example/speech/SpeechProfanityFilterTest.java b/speech/src/test/java/com/example/speech/SpeechProfanityFilterTest.java
new file mode 100644
index 00000000000..ddf1ccaeecb
--- /dev/null
+++ b/speech/src/test/java/com/example/speech/SpeechProfanityFilterTest.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class SpeechProfanityFilterTest {
+ private static final String AUDIO_FILE = "gs://cloud-samples-tests/speech/brooklyn.flac";
+ private ByteArrayOutputStream bout;
+ private PrintStream stdout;
+ private PrintStream out;
+
+ @Before
+ public void setUp() {
+ bout = new ByteArrayOutputStream();
+ out = new PrintStream(bout);
+ stdout = System.out;
+ System.setOut(out);
+ }
+
+ @After
+ public void tearDown() {
+ System.setOut(stdout);
+ }
+
+ @Test
+ public void testSpeechProfanityFilter() throws Exception {
+ SpeechProfanityFilter.speechProfanityFilter(AUDIO_FILE);
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ }
+}
diff --git a/speech/src/test/java/com/example/speech/TranscribeDiarizationIT.java b/speech/src/test/java/com/example/speech/TranscribeDiarizationIT.java
new file mode 100644
index 00000000000..ce69cdd2286
--- /dev/null
+++ b/speech/src/test/java/com/example/speech/TranscribeDiarizationIT.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import static com.google.common.truth.Truth.assertThat;
+import static junit.framework.TestCase.assertNotNull;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.concurrent.ExecutionException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+// Tests for speech Transcribe Diarization samples.
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class TranscribeDiarizationIT {
+ private ByteArrayOutputStream bout;
+ private PrintStream out;
+
+ // The path to the audio file to transcribe
+ private String recognitionAudioFile = "./resources/commercial_mono.wav";
+
+ private static void requireEnvVar(String varName) {
+ assertNotNull(
+ System.getenv(varName),
+ "Environment variable '%s' is required to perform these tests.".format(varName));
+ }
+
+ @BeforeClass
+ public static void checkRequirements() {
+ requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS");
+ }
+
+ @Before
+ public void setUp() {
+ bout = new ByteArrayOutputStream();
+ out = new PrintStream(bout);
+ System.setOut(out);
+ }
+
+ @After
+ public void tearDown() {
+ System.setOut(null);
+ }
+
+ @Test
+ public void testDiarization() throws IOException {
+ TranscribeDiarization.transcribeDiarization(recognitionAudioFile);
+ String got = bout.toString();
+ assertThat(got).contains("Speaker");
+ }
+
+ @Test
+ public void testDiarizationGcs() throws IOException, ExecutionException, InterruptedException {
+ TranscribeDiarizationGcs.transcribeDiarizationGcs(
+ "gs://cloud-samples-data/speech/commercial_mono.wav");
+ String got = bout.toString();
+ assertThat(got).contains("Speaker");
+ }
+}