diff --git a/speech/README.md b/speech/README.md deleted file mode 100644 index 635274c71cd..00000000000 --- a/speech/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Cloud Speech API samples for Java - -These samples have moved to [googleapis/java-speech](https://github.com/googleapis/java-speech/tree/main/samples). \ No newline at end of file diff --git a/speech/pom.xml b/speech/pom.xml new file mode 100644 index 00000000000..085ac51000c --- /dev/null +++ b/speech/pom.xml @@ -0,0 +1,75 @@ + + + 4.0.0 + com.example.speech + google-cloud-speech-snippets + jar + Google Cloud Speech Snippets + https://github.com/GoogleCloudPlatform/java-docs-samples/tree/main/speech + + + + com.google.cloud.samples + shared-configuration + 1.2.0 + + + + 1.8 + 1.8 + UTF-8 + + + + + + + + com.google.cloud + libraries-bom + 26.1.3 + pom + import + + + + + + + org.json + json + 20220924 + + + com.google.cloud + google-cloud-speech + + + + com.google.cloud + google-cloud-storage + + + commons-cli + commons-cli + 1.5.0 + + + junit + junit + 4.13.2 + test + + + com.google.truth + truth + 1.1.3 + test + + + + + diff --git a/speech/resources/Google_Gnome.wav b/speech/resources/Google_Gnome.wav new file mode 100644 index 00000000000..2f497b7fbe7 Binary files /dev/null and b/speech/resources/Google_Gnome.wav differ diff --git a/speech/resources/audio.raw b/speech/resources/audio.raw new file mode 100644 index 00000000000..5ebf79d3c9c Binary files /dev/null and b/speech/resources/audio.raw differ diff --git a/speech/resources/commercial_mono.wav b/speech/resources/commercial_mono.wav new file mode 100644 index 00000000000..e6b9ed434f9 Binary files /dev/null and b/speech/resources/commercial_mono.wav differ diff --git a/speech/resources/commercial_stereo.wav b/speech/resources/commercial_stereo.wav new file mode 100644 index 00000000000..467f3687702 Binary files /dev/null and b/speech/resources/commercial_stereo.wav differ diff --git a/speech/src/main/java/com/example/speech/InfiniteStreamRecognize.java b/speech/src/main/java/com/example/speech/InfiniteStreamRecognize.java new file mode 100644 index 00000000000..1695f08e63a --- /dev/null +++ b/speech/src/main/java/com/example/speech/InfiniteStreamRecognize.java @@ -0,0 +1,302 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +// [START speech_transcribe_infinite_streaming] + +import com.google.api.gax.rpc.ClientStream; +import com.google.api.gax.rpc.ResponseObserver; +import com.google.api.gax.rpc.StreamController; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig; +import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult; +import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest; +import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse; +import com.google.protobuf.ByteString; +import com.google.protobuf.Duration; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.DataLine; +import javax.sound.sampled.DataLine.Info; +import javax.sound.sampled.TargetDataLine; + +public class InfiniteStreamRecognize { + + private static final int STREAMING_LIMIT = 290000; // ~5 minutes + + public static final String RED = "\033[0;31m"; + public static final String GREEN = "\033[0;32m"; + public static final String YELLOW = "\033[0;33m"; + + // Creating shared object + private static volatile BlockingQueue sharedQueue = new LinkedBlockingQueue(); + private static TargetDataLine targetDataLine; + private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes + + private static int restartCounter = 0; + private static ArrayList audioInput = new ArrayList(); + private static ArrayList lastAudioInput = new ArrayList(); + private static int resultEndTimeInMS = 0; + private static int isFinalEndTime = 0; + private static int finalRequestEndTime = 0; + private static boolean newStream = true; + private static double bridgingOffset = 0; + private static boolean lastTranscriptWasFinal = false; + private static StreamController referenceToStreamController; + private static ByteString tempByteString; + + public static void main(String... args) { + InfiniteStreamRecognizeOptions options = InfiniteStreamRecognizeOptions.fromFlags(args); + if (options == null) { + // Could not parse. + System.out.println("Failed to parse options."); + System.exit(1); + } + + try { + infiniteStreamingRecognize(options.langCode); + } catch (Exception e) { + System.out.println("Exception caught: " + e); + } + } + + public static String convertMillisToDate(double milliSeconds) { + long millis = (long) milliSeconds; + DecimalFormat format = new DecimalFormat(); + format.setMinimumIntegerDigits(2); + return String.format( + "%s:%s /", + format.format(TimeUnit.MILLISECONDS.toMinutes(millis)), + format.format( + TimeUnit.MILLISECONDS.toSeconds(millis) + - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis)))); + } + + /** Performs infinite streaming speech recognition */ + public static void infiniteStreamingRecognize(String languageCode) throws Exception { + + // Microphone Input buffering + class MicBuffer implements Runnable { + + @Override + public void run() { + System.out.println(YELLOW); + System.out.println("Start speaking...Press Ctrl-C to stop"); + targetDataLine.start(); + byte[] data = new byte[BYTES_PER_BUFFER]; + while (targetDataLine.isOpen()) { + try { + int numBytesRead = targetDataLine.read(data, 0, data.length); + if ((numBytesRead <= 0) && (targetDataLine.isOpen())) { + continue; + } + sharedQueue.put(data.clone()); + } catch (InterruptedException e) { + System.out.println("Microphone input buffering interrupted : " + e.getMessage()); + } + } + } + } + + // Creating microphone input buffer thread + MicBuffer micrunnable = new MicBuffer(); + Thread micThread = new Thread(micrunnable); + ResponseObserver responseObserver = null; + try (SpeechClient client = SpeechClient.create()) { + ClientStream clientStream; + responseObserver = + new ResponseObserver() { + + ArrayList responses = new ArrayList<>(); + + public void onStart(StreamController controller) { + referenceToStreamController = controller; + } + + public void onResponse(StreamingRecognizeResponse response) { + responses.add(response); + StreamingRecognitionResult result = response.getResultsList().get(0); + Duration resultEndTime = result.getResultEndTime(); + resultEndTimeInMS = + (int) + ((resultEndTime.getSeconds() * 1000) + (resultEndTime.getNanos() / 1000000)); + double correctedTime = + resultEndTimeInMS - bridgingOffset + (STREAMING_LIMIT * restartCounter); + + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + if (result.getIsFinal()) { + System.out.print(GREEN); + System.out.print("\033[2K\r"); + System.out.printf( + "%s: %s [confidence: %.2f]\n", + convertMillisToDate(correctedTime), + alternative.getTranscript(), + alternative.getConfidence()); + isFinalEndTime = resultEndTimeInMS; + lastTranscriptWasFinal = true; + } else { + System.out.print(RED); + System.out.print("\033[2K\r"); + System.out.printf( + "%s: %s", convertMillisToDate(correctedTime), alternative.getTranscript()); + lastTranscriptWasFinal = false; + } + } + + public void onComplete() {} + + public void onError(Throwable t) {} + }; + clientStream = client.streamingRecognizeCallable().splitCall(responseObserver); + + RecognitionConfig recognitionConfig = + RecognitionConfig.newBuilder() + .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16) + .setLanguageCode(languageCode) + .setSampleRateHertz(16000) + .build(); + + StreamingRecognitionConfig streamingRecognitionConfig = + StreamingRecognitionConfig.newBuilder() + .setConfig(recognitionConfig) + .setInterimResults(true) + .build(); + + StreamingRecognizeRequest request = + StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(streamingRecognitionConfig) + .build(); // The first request in a streaming call has to be a config + + clientStream.send(request); + + try { + // SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true, + // bigEndian: false + AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false); + DataLine.Info targetInfo = + new Info( + TargetDataLine.class, + audioFormat); // Set the system information to read from the microphone audio + // stream + + if (!AudioSystem.isLineSupported(targetInfo)) { + System.out.println("Microphone not supported"); + System.exit(0); + } + // Target data line captures the audio stream the microphone produces. + targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo); + targetDataLine.open(audioFormat); + micThread.start(); + + long startTime = System.currentTimeMillis(); + + while (true) { + + long estimatedTime = System.currentTimeMillis() - startTime; + + if (estimatedTime >= STREAMING_LIMIT) { + + clientStream.closeSend(); + referenceToStreamController.cancel(); // remove Observer + + if (resultEndTimeInMS > 0) { + finalRequestEndTime = isFinalEndTime; + } + resultEndTimeInMS = 0; + + lastAudioInput = null; + lastAudioInput = audioInput; + audioInput = new ArrayList(); + + restartCounter++; + + if (!lastTranscriptWasFinal) { + System.out.print('\n'); + } + + newStream = true; + + clientStream = client.streamingRecognizeCallable().splitCall(responseObserver); + + request = + StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(streamingRecognitionConfig) + .build(); + + System.out.println(YELLOW); + System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT); + + startTime = System.currentTimeMillis(); + + } else { + + if ((newStream) && (lastAudioInput.size() > 0)) { + // if this is the first audio from a new request + // calculate amount of unfinalized audio from last request + // resend the audio to the speech client before incoming audio + double chunkTime = STREAMING_LIMIT / lastAudioInput.size(); + // ms length of each chunk in previous request audio arrayList + if (chunkTime != 0) { + if (bridgingOffset < 0) { + // bridging Offset accounts for time of resent audio + // calculated from last request + bridgingOffset = 0; + } + if (bridgingOffset > finalRequestEndTime) { + bridgingOffset = finalRequestEndTime; + } + int chunksFromMs = + (int) Math.floor((finalRequestEndTime - bridgingOffset) / chunkTime); + // chunks from MS is number of chunks to resend + bridgingOffset = + (int) Math.floor((lastAudioInput.size() - chunksFromMs) * chunkTime); + // set bridging offset for next request + for (int i = chunksFromMs; i < lastAudioInput.size(); i++) { + request = + StreamingRecognizeRequest.newBuilder() + .setAudioContent(lastAudioInput.get(i)) + .build(); + clientStream.send(request); + } + } + newStream = false; + } + + tempByteString = ByteString.copyFrom(sharedQueue.take()); + + request = + StreamingRecognizeRequest.newBuilder().setAudioContent(tempByteString).build(); + + audioInput.add(tempByteString); + } + + clientStream.send(request); + } + } catch (Exception e) { + System.out.println(e); + } + } + } +} +// [END speech_transcribe_infinite_streaming] diff --git a/speech/src/main/java/com/example/speech/InfiniteStreamRecognizeOptions.java b/speech/src/main/java/com/example/speech/InfiniteStreamRecognizeOptions.java new file mode 100644 index 00000000000..909ff2be08c --- /dev/null +++ b/speech/src/main/java/com/example/speech/InfiniteStreamRecognizeOptions.java @@ -0,0 +1,55 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class InfiniteStreamRecognizeOptions { + String langCode = "en-US"; // by default english US + + /** Construct an InfiniteStreamRecognizeOptions class from command line flags. */ + public static InfiniteStreamRecognizeOptions fromFlags(String[] args) { + Options options = new Options(); + options.addOption( + Option.builder() + .type(String.class) + .longOpt("lang_code") + .hasArg() + .desc("Language code") + .build()); + + CommandLineParser parser = new DefaultParser(); + CommandLine commandLine; + try { + commandLine = parser.parse(options, args); + InfiniteStreamRecognizeOptions res = new InfiniteStreamRecognizeOptions(); + + if (commandLine.hasOption("lang_code")) { + res.langCode = commandLine.getOptionValue("lang_code"); + } + return res; + } catch (ParseException e) { + System.err.println(e.getMessage()); + return null; + } + } +} diff --git a/speech/src/main/java/com/example/speech/QuickstartSample.java b/speech/src/main/java/com/example/speech/QuickstartSample.java new file mode 100644 index 00000000000..245d0d0b812 --- /dev/null +++ b/speech/src/main/java/com/example/speech/QuickstartSample.java @@ -0,0 +1,62 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +// [START speech_quickstart] +// Imports the Google Cloud client library +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import java.util.List; + +public class QuickstartSample { + + /** Demonstrates using the Speech API to transcribe an audio file. */ + public static void main(String... args) throws Exception { + // Instantiates a client + try (SpeechClient speechClient = SpeechClient.create()) { + + // The path to the audio file to transcribe + String gcsUri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"; + + // Builds the sync recognize request + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRateHertz(16000) + .setLanguageCode("en-US") + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Performs speech recognition on the audio file + RecognizeResponse response = speechClient.recognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + } +} +// [END speech_quickstart] diff --git a/speech/src/main/java/com/example/speech/Recognize.java b/speech/src/main/java/com/example/speech/Recognize.java new file mode 100644 index 00000000000..ab060e43ea0 --- /dev/null +++ b/speech/src/main/java/com/example/speech/Recognize.java @@ -0,0 +1,941 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.api.gax.longrunning.OperationTimedPollAlgorithm; +import com.google.api.gax.retrying.RetrySettings; +import com.google.api.gax.retrying.TimedRetryAlgorithm; +import com.google.api.gax.rpc.ApiStreamObserver; +import com.google.api.gax.rpc.BidiStreamingCallable; +import com.google.api.gax.rpc.ClientStream; +import com.google.api.gax.rpc.ResponseObserver; +import com.google.api.gax.rpc.StreamController; +import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; +import com.google.cloud.speech.v1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.cloud.speech.v1.SpeechSettings; +import com.google.cloud.speech.v1.StreamingRecognitionConfig; +import com.google.cloud.speech.v1.StreamingRecognitionResult; +import com.google.cloud.speech.v1.StreamingRecognizeRequest; +import com.google.cloud.speech.v1.StreamingRecognizeResponse; +import com.google.cloud.speech.v1.WordInfo; +import com.google.common.util.concurrent.SettableFuture; +import com.google.protobuf.ByteString; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioInputStream; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.DataLine; +import javax.sound.sampled.DataLine.Info; +import javax.sound.sampled.TargetDataLine; +import org.threeten.bp.Duration; + +public class Recognize { + + /** Run speech recognition tasks. */ + public static void main(String... args) throws Exception { + if (args.length < 1) { + System.out.println("Usage:"); + System.out.printf( + "\tjava %s \"\" \"\"\n" + + "Commands:\n" + + "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n" + + "\t| wordoffsets | auto-punctuation | stream-punctuation \n" + + "\t| enhanced-model | model-selection | multi-channel\n" + + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI " + + "for a Cloud Storage resource (gs://...)\n", + Recognize.class.getCanonicalName()); + return; + } + String command = args[0]; + String path = args.length > 1 ? args[1] : ""; + + // Use command and GCS path pattern to invoke transcription. + if (command.equals("syncrecognize")) { + if (path.startsWith("gs://")) { + syncRecognizeGcs(path); + } else { + syncRecognizeFile(path); + } + } else if (command.equals("wordoffsets")) { + if (path.startsWith("gs://")) { + asyncRecognizeWords(path); + } else { + syncRecognizeWords(path); + } + } else if (command.equals("asyncrecognize")) { + if (path.startsWith("gs://")) { + asyncRecognizeGcs(path); + } else { + asyncRecognizeFile(path); + } + } else if (command.equals("streamrecognize")) { + streamingRecognizeFile(path); + } else if (command.equals("micstreamrecognize")) { + streamingMicRecognize(); + } else if (command.equals("auto-punctuation")) { + if (path.startsWith("gs://")) { + transcribeGcsWithAutomaticPunctuation(path); + } else { + transcribeFileWithAutomaticPunctuation(path); + } + } else if (command.equals("stream-punctuation")) { + streamingTranscribeWithAutomaticPunctuation(path); + } else if (command.equals("enhanced-model")) { + transcribeFileWithEnhancedModel(path); + } else if (command.equals("model-selection")) { + if (path.startsWith("gs://")) { + transcribeModelSelectionGcs(path); + } else { + transcribeModelSelection(path); + } + } else if (command.equals("multi-channel")) { + if (path.startsWith("gs://")) { + transcribeMultiChannelGcs(path); + } else { + transcribeMultiChannel(path); + } + } + } + + // [START speech_transcribe_sync] + /** + * Performs speech recognition on raw PCM audio and prints the transcription. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void syncRecognizeFile(String fileName) throws Exception { + try (SpeechClient speech = SpeechClient.create()) { + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + // Configure request with local raw PCM audio + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build(); + + // Use blocking call to get audio transcript + RecognizeResponse response = speech.recognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + } + // [END speech_transcribe_sync] + + /** + * Performs sync recognize and prints word time offsets. + * + * @param fileName the path to a PCM audio file to transcribe get offsets on. + */ + public static void syncRecognizeWords(String fileName) throws Exception { + try (SpeechClient speech = SpeechClient.create()) { + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + // Configure request with local raw PCM audio + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableWordTimeOffsets(true) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build(); + + // Use blocking call to get audio transcript + RecognizeResponse response = speech.recognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + for (WordInfo wordInfo : alternative.getWordsList()) { + System.out.println(wordInfo.getWord()); + System.out.printf( + "\t%s.%s sec - %s.%s sec\n", + wordInfo.getStartTime().getSeconds(), + wordInfo.getStartTime().getNanos() / 100000000, + wordInfo.getEndTime().getSeconds(), + wordInfo.getEndTime().getNanos() / 100000000); + } + } + } + } + + // [START speech_transcribe_sync_gcs] + /** + * Performs speech recognition on remote FLAC file and prints the transcription. + * + * @param gcsUri the path to the remote FLAC audio file to transcribe. + */ + public static void syncRecognizeGcs(String gcsUri) throws Exception { + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create()) { + // Builds the request for remote FLAC file + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use blocking call for getting audio transcript + RecognizeResponse response = speech.recognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + } + // [END speech_transcribe_sync_gcs] + + // [START speech_transcribe_async] + /** + * Performs non-blocking speech recognition on raw PCM audio and prints the transcription. Note + * that transcription is limited to 60 seconds audio. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void asyncRecognizeFile(String fileName) throws Exception { + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create()) { + + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + // Configure request with local raw PCM audio + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + + List results = response.get().getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } + } + } + // [END speech_transcribe_async] + + // [START speech_transcribe_async_word_time_offsets_gcs] + /** + * Performs non-blocking speech recognition on remote FLAC file and prints the transcription as + * well as word time offsets. + * + * @param gcsUri the path to the remote LINEAR16 audio file to transcribe. + */ + public static void asyncRecognizeWords(String gcsUri) throws Exception { + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create()) { + + // Configure remote file request for FLAC + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableWordTimeOffsets(true) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + + List results = response.get().getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s\n", alternative.getTranscript()); + for (WordInfo wordInfo : alternative.getWordsList()) { + System.out.println(wordInfo.getWord()); + System.out.printf( + "\t%s.%s sec - %s.%s sec\n", + wordInfo.getStartTime().getSeconds(), + wordInfo.getStartTime().getNanos() / 100000000, + wordInfo.getEndTime().getSeconds(), + wordInfo.getEndTime().getNanos() / 100000000); + } + } + } + } + // [END speech_transcribe_async_word_time_offsets_gcs] + + // [START speech_transcribe_async_gcs] + /** + * Performs non-blocking speech recognition on remote FLAC file and prints the transcription. + * + * @param gcsUri the path to the remote LINEAR16 audio file to transcribe. + */ + public static void asyncRecognizeGcs(String gcsUri) throws Exception { + // Configure polling algorithm + SpeechSettings.Builder speechSettings = SpeechSettings.newBuilder(); + TimedRetryAlgorithm timedRetryAlgorithm = + OperationTimedPollAlgorithm.create( + RetrySettings.newBuilder() + .setInitialRetryDelay(Duration.ofMillis(500L)) + .setRetryDelayMultiplier(1.5) + .setMaxRetryDelay(Duration.ofMillis(5000L)) + .setInitialRpcTimeout(Duration.ZERO) // ignored + .setRpcTimeoutMultiplier(1.0) // ignored + .setMaxRpcTimeout(Duration.ZERO) // ignored + .setTotalTimeout(Duration.ofHours(24L)) // set polling timeout to 24 hours + .build()); + speechSettings.longRunningRecognizeOperationSettings().setPollingAlgorithm(timedRetryAlgorithm); + + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create(speechSettings.build())) { + + // Configure remote file request for FLAC + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + + List results = response.get().getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s\n", alternative.getTranscript()); + } + } + } + // [END speech_transcribe_async_gcs] + + // [START speech_transcribe_streaming] + /** + * Performs streaming speech recognition on raw PCM audio data. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void streamingRecognizeFile(String fileName) throws Exception, IOException { + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create()) { + + // Configure request with local raw PCM audio + RecognitionConfig recConfig = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setModel("default") + .build(); + StreamingRecognitionConfig config = + StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build(); + + class ResponseApiStreamingObserver implements ApiStreamObserver { + private final SettableFuture> future = SettableFuture.create(); + private final List messages = new java.util.ArrayList(); + + @Override + public void onNext(T message) { + messages.add(message); + } + + @Override + public void onError(Throwable t) { + future.setException(t); + } + + @Override + public void onCompleted() { + future.set(messages); + } + + // Returns the SettableFuture object to get received messages / exceptions. + public SettableFuture> future() { + return future; + } + } + + ResponseApiStreamingObserver responseObserver = + new ResponseApiStreamingObserver<>(); + + BidiStreamingCallable callable = + speech.streamingRecognizeCallable(); + + ApiStreamObserver requestObserver = + callable.bidiStreamingCall(responseObserver); + + // The first request must **only** contain the audio configuration: + requestObserver.onNext( + StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build()); + + // Subsequent requests must **only** contain the audio data. + requestObserver.onNext( + StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(data)) + .build()); + + // Mark transmission as completed after sending the data. + requestObserver.onCompleted(); + + List responses = responseObserver.future().get(); + + for (StreamingRecognizeResponse response : responses) { + // For streaming recognize, the results list has one is_final result (if available) followed + // by a number of in-progress results (if iterim_results is true) for subsequent utterances. + // Just print the first result here. + StreamingRecognitionResult result = response.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + } + // [END speech_transcribe_streaming] + + // [START speech_sync_recognize_punctuation] + /** + * Performs transcription with automatic punctuation on raw PCM audio data. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void transcribeFileWithAutomaticPunctuation(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + // Configure request with local raw PCM audio + RecognitionConfig recConfig = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableAutomaticPunctuation(true) + .build(); + + // Get the contents of the local audio file + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(recConfig, recognitionAudio); + + // Just print the first result here. + SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0); + + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + + // Print out the result + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + // [END speech_sync_recognize_punctuation] + + // [START speech_transcribe_auto_punctuation] + /** + * Performs transcription on remote FLAC file and prints the transcription. + * + * @param gcsUri the path to the remote FLAC audio file to transcribe. + */ + public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception { + try (SpeechClient speechClient = SpeechClient.create()) { + // Configure request with raw PCM audio + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableAutomaticPunctuation(true) + .build(); + + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speechClient.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + + // Just print the first result here. + SpeechRecognitionResult result = response.get().getResultsList().get(0); + + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + + // Print out the result + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + // [END speech_transcribe_auto_punctuation] + + // [START speech_stream_recognize_punctuation] + /** + * Performs streaming speech recognition on raw PCM audio data. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void streamingTranscribeWithAutomaticPunctuation(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create()) { + + // Configure request with local raw PCM audio + RecognitionConfig recConfig = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableAutomaticPunctuation(true) + .build(); + + // Build the streaming config with the audio config + StreamingRecognitionConfig config = + StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build(); + + class ResponseApiStreamingObserver implements ApiStreamObserver { + private final SettableFuture> future = SettableFuture.create(); + private final List messages = new java.util.ArrayList(); + + @Override + public void onNext(T message) { + messages.add(message); + } + + @Override + public void onError(Throwable t) { + future.setException(t); + } + + @Override + public void onCompleted() { + future.set(messages); + } + + // Returns the SettableFuture object to get received messages / exceptions. + public SettableFuture> future() { + return future; + } + } + + ResponseApiStreamingObserver responseObserver = + new ResponseApiStreamingObserver<>(); + + BidiStreamingCallable callable = + speech.streamingRecognizeCallable(); + + ApiStreamObserver requestObserver = + callable.bidiStreamingCall(responseObserver); + + // The first request must **only** contain the audio configuration: + requestObserver.onNext( + StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build()); + + // Subsequent requests must **only** contain the audio data. + requestObserver.onNext( + StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(data)) + .build()); + + // Mark transmission as completed after sending the data. + requestObserver.onCompleted(); + + List responses = responseObserver.future().get(); + + for (StreamingRecognizeResponse response : responses) { + // For streaming recognize, the results list has one is_final result (if available) followed + // by a number of in-progress results (if iterim_results is true) for subsequent utterances. + // Just print the first result here. + StreamingRecognitionResult result = response.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + } + // [END speech_stream_recognize_punctuation] + + // [START speech_transcribe_streaming_mic] + /** Performs microphone streaming speech recognition with a duration of 1 minute. */ + public static void streamingMicRecognize() throws Exception { + + ResponseObserver responseObserver = null; + try (SpeechClient client = SpeechClient.create()) { + + responseObserver = + new ResponseObserver() { + ArrayList responses = new ArrayList<>(); + + public void onStart(StreamController controller) {} + + public void onResponse(StreamingRecognizeResponse response) { + responses.add(response); + } + + public void onComplete() { + for (StreamingRecognizeResponse response : responses) { + StreamingRecognitionResult result = response.getResultsList().get(0); + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + + public void onError(Throwable t) { + System.out.println(t); + } + }; + + ClientStream clientStream = + client.streamingRecognizeCallable().splitCall(responseObserver); + + RecognitionConfig recognitionConfig = + RecognitionConfig.newBuilder() + .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + StreamingRecognitionConfig streamingRecognitionConfig = + StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build(); + + StreamingRecognizeRequest request = + StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(streamingRecognitionConfig) + .build(); // The first request in a streaming call has to be a config + + clientStream.send(request); + // SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true, + // bigEndian: false + AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false); + DataLine.Info targetInfo = + new Info( + TargetDataLine.class, + audioFormat); // Set the system information to read from the microphone audio stream + + if (!AudioSystem.isLineSupported(targetInfo)) { + System.out.println("Microphone not supported"); + System.exit(0); + } + // Target data line captures the audio stream the microphone produces. + TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo); + targetDataLine.open(audioFormat); + targetDataLine.start(); + System.out.println("Start speaking"); + long startTime = System.currentTimeMillis(); + // Audio Input Stream + AudioInputStream audio = new AudioInputStream(targetDataLine); + while (true) { + long estimatedTime = System.currentTimeMillis() - startTime; + byte[] data = new byte[6400]; + audio.read(data); + if (estimatedTime > 60000) { // 60 seconds + System.out.println("Stop speaking."); + targetDataLine.stop(); + targetDataLine.close(); + break; + } + request = + StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(data)) + .build(); + clientStream.send(request); + } + } catch (Exception e) { + System.out.println(e); + } + responseObserver.onComplete(); + } + // [END speech_transcribe_streaming_mic] + + // [START speech_transcribe_enhanced_model] + /** + * Transcribe the given audio file using an enhanced model. + * + * @param fileName the path to an audio file. + */ + public static void transcribeFileWithEnhancedModel(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + // Get the contents of the local audio file + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + + // Configure request to enable enhanced models + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(8000) + .setUseEnhanced(true) + // A model must be specified to use enhanced model. + .setModel("phone_call") + .build(); + + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); + + // Print out the results + for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternatives(0); + System.out.format("Transcript: %s\n\n", alternative.getTranscript()); + } + } + } + // [END speech_transcribe_enhanced_model] + + // [START speech_transcribe_model_selection] + /** + * Performs transcription of the given audio file synchronously with the selected model. + * + * @param fileName the path to a audio file to transcribe + */ + public static void transcribeModelSelection(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speech = SpeechClient.create()) { + // Configure request with video media type + RecognitionConfig recConfig = + RecognitionConfig.newBuilder() + // encoding may either be omitted or must match the value in the file header + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + // sample rate hertz may be either be omitted or must match the value in the file + // header + .setSampleRateHertz(16000) + .setModel("video") + .build(); + + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + + RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio); + // Just print the first result here. + SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + // [END speech_transcribe_model_selection] + + // [START speech_transcribe_model_selection_gcs] + /** + * Performs transcription of the remote audio file asynchronously with the selected model. + * + * @param gcsUri the path to the remote audio file to transcribe. + */ + public static void transcribeModelSelectionGcs(String gcsUri) throws Exception { + try (SpeechClient speech = SpeechClient.create()) { + + // Configure request with video media type + RecognitionConfig config = + RecognitionConfig.newBuilder() + // encoding may either be omitted or must match the value in the file header + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + // sample rate hertz may be either be omitted or must match the value in the file + // header + .setSampleRateHertz(16000) + .setModel("video") + .build(); + + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + + List results = response.get().getResultsList(); + + // Just print the first result here. + SpeechRecognitionResult result = results.get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + // [END speech_transcribe_model_selection_gcs] + + // [START speech_transcribe_multichannel] + /** + * Transcribe a local audio file with multi-channel recognition + * + * @param fileName the path to local audio file + */ + public static void transcribeMultiChannel(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + // Get the contents of the local audio file + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + + // Configure request to enable multiple channels + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(44100) + .setAudioChannelCount(2) + .setEnableSeparateRecognitionPerChannel(true) + .build(); + + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); + + // Print out the results + for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternatives(0); + System.out.format("Transcript : %s\n", alternative.getTranscript()); + System.out.printf("Channel Tag : %s\n", result.getChannelTag()); + } + } + } + // [END speech_transcribe_multichannel] + + // [START speech_transcribe_multichannel_gcs] + /** + * Transcribe a remote audio file with multi-channel recognition + * + * @param gcsUri the path to the audio file + */ + public static void transcribeMultiChannelGcs(String gcsUri) throws Exception { + + try (SpeechClient speechClient = SpeechClient.create()) { + + // Configure request to enable multiple channels + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(44100) + .setAudioChannelCount(2) + .setEnableSeparateRecognitionPerChannel(true) + .build(); + + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speechClient.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + // Just print the first result here. + for (SpeechRecognitionResult result : response.get().getResultsList()) { + + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + + // Print out the result + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + System.out.printf("Channel Tag : %s\n", result.getChannelTag()); + } + } + } + // [END speech_transcribe_multichannel_gcs] +} diff --git a/speech/src/main/java/com/example/speech/RecognizeBeta.java b/speech/src/main/java/com/example/speech/RecognizeBeta.java new file mode 100644 index 00000000000..99544b79eff --- /dev/null +++ b/speech/src/main/java/com/example/speech/RecognizeBeta.java @@ -0,0 +1,532 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata; +import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1p1beta1.RecognitionMetadata; +import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.InteractionType; +import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.MicrophoneDistance; +import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.RecordingDeviceType; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeakerDiarizationConfig; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import com.google.cloud.speech.v1p1beta1.WordInfo; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; + +public class RecognizeBeta { + + /** Run speech recognition tasks. */ + public static void main(String... args) throws Exception { + if (args.length < 1) { + System.out.println("Usage:"); + System.out.printf( + "\tjava %s \"\" \"\"\n" + + "Commands:\n" + + "\t metadata | diarization | multi-channel |\n" + + "\t multi-language | word-level-conf\n" + + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI " + + "for a Cloud Storage resource (gs://...)\n", + RecognizeBeta.class.getCanonicalName()); + return; + } + String command = args[0]; + String path = args.length > 1 ? args[1] : ""; + + // Use command and GCS path pattern to invoke transcription. + if (command.equals("metadata")) { + transcribeFileWithMetadata(path); + } else if (command.equals("diarization")) { + if (path.startsWith("gs://")) { + transcribeDiarizationGcs(path); + } else { + transcribeDiarization(path); + } + } else if (command.equals("multi-channel")) { + if (path.startsWith("gs://")) { + transcribeMultiChannelGcs(path); + } else { + transcribeMultiChannel(path); + } + } else if (command.equals("multi-language")) { + if (path.startsWith("gs://")) { + transcribeMultiLanguageGcs(path); + } else { + transcribeMultiLanguage(path); + } + } else if (command.equals("word-level-conf")) { + if (path.startsWith("gs://")) { + transcribeWordLevelConfidenceGcs(path); + } else { + transcribeWordLevelConfidence(path); + } + } + } + + // [START speech_transcribe_recognition_metadata_beta] + /** + * Transcribe the given audio file and include recognition metadata in the request. + * + * @param fileName the path to an audio file. + */ + public static void transcribeFileWithMetadata(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + // Get the contents of the local audio file + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + + // Construct a recognition metadata object. + // Most metadata fields are specified as enums that can be found + // in speech.enums.RecognitionMetadata + RecognitionMetadata metadata = + RecognitionMetadata.newBuilder() + .setInteractionType(InteractionType.DISCUSSION) + .setMicrophoneDistance(MicrophoneDistance.NEARFIELD) + .setRecordingDeviceType(RecordingDeviceType.SMARTPHONE) + .setRecordingDeviceName("Pixel 2 XL") // Some metadata fields are free form strings + // And some are integers, for instance the 6 digit NAICS code + // https://www.naics.com/search/ + .setIndustryNaicsCodeOfAudio(519190) + .build(); + + // Configure request to enable enhanced models + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(8000) + .setMetadata(metadata) // Add the metadata to the config + .build(); + + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); + + // Print out the results + for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternatives(0); + System.out.format("Transcript: %s\n\n", alternative.getTranscript()); + } + } + } + // [END speech_transcribe_recognition_metadata_beta] + + // [START speech_transcribe_diarization_beta] + /** + * Transcribe the given audio file using speaker diarization. + * + * @param fileName the path to an audio file. + */ + public static void transcribeDiarization(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + // Get the contents of the local audio file + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + + SpeakerDiarizationConfig speakerDiarizationConfig = + SpeakerDiarizationConfig.newBuilder() + .setEnableSpeakerDiarization(true) + .setMinSpeakerCount(2) + .setMaxSpeakerCount(2) + .build(); + + // Configure request to enable Speaker diarization + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(8000) + .setDiarizationConfig(speakerDiarizationConfig) + .build(); + + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); + + // Speaker Tags are only included in the last result object, which has only one alternative. + SpeechRecognitionAlternative alternative = + recognizeResponse.getResults(recognizeResponse.getResultsCount() - 1).getAlternatives(0); + + // The alternative is made up of WordInfo objects that contain the speaker_tag. + WordInfo wordInfo = alternative.getWords(0); + int currentSpeakerTag = wordInfo.getSpeakerTag(); + + // For each word, get all the words associated with one speaker, once the speaker changes, + // add a new line with the new speaker and their spoken words. + StringBuilder speakerWords = + new StringBuilder( + String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord())); + + for (int i = 1; i < alternative.getWordsCount(); i++) { + wordInfo = alternative.getWords(i); + if (currentSpeakerTag == wordInfo.getSpeakerTag()) { + speakerWords.append(" "); + speakerWords.append(wordInfo.getWord()); + } else { + speakerWords.append( + String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord())); + currentSpeakerTag = wordInfo.getSpeakerTag(); + } + } + + System.out.println(speakerWords.toString()); + } + } + // [END speech_transcribe_diarization_beta] + + // [START speech_transcribe_diarization_gcs_beta] + /** + * Transcribe a remote audio file using speaker diarization. + * + * @param gcsUri the path to an audio file. + */ + public static void transcribeDiarizationGcs(String gcsUri) throws Exception { + try (SpeechClient speechClient = SpeechClient.create()) { + SpeakerDiarizationConfig speakerDiarizationConfig = + SpeakerDiarizationConfig.newBuilder() + .setEnableSpeakerDiarization(true) + .setMinSpeakerCount(2) + .setMaxSpeakerCount(2) + .build(); + + // Configure request to enable Speaker diarization + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(8000) + .setDiarizationConfig(speakerDiarizationConfig) + .build(); + + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speechClient.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + + // Speaker Tags are only included in the last result object, which has only one alternative. + LongRunningRecognizeResponse longRunningRecognizeResponse = response.get(); + SpeechRecognitionAlternative alternative = + longRunningRecognizeResponse + .getResults(longRunningRecognizeResponse.getResultsCount() - 1) + .getAlternatives(0); + + // The alternative is made up of WordInfo objects that contain the speaker_tag. + WordInfo wordInfo = alternative.getWords(0); + int currentSpeakerTag = wordInfo.getSpeakerTag(); + + // For each word, get all the words associated with one speaker, once the speaker changes, + // add a new line with the new speaker and their spoken words. + StringBuilder speakerWords = + new StringBuilder( + String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord())); + + for (int i = 1; i < alternative.getWordsCount(); i++) { + wordInfo = alternative.getWords(i); + if (currentSpeakerTag == wordInfo.getSpeakerTag()) { + speakerWords.append(" "); + speakerWords.append(wordInfo.getWord()); + } else { + speakerWords.append( + String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord())); + currentSpeakerTag = wordInfo.getSpeakerTag(); + } + } + + System.out.println(speakerWords.toString()); + } + } + // [END speech_transcribe_diarization_gcs_beta] + + // [START speech_transcribe_multichannel_beta] + /** + * Transcribe a local audio file with multi-channel recognition + * + * @param fileName the path to local audio file + */ + public static void transcribeMultiChannel(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + // Get the contents of the local audio file + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + + // Configure request to enable multiple channels + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(44100) + .setAudioChannelCount(2) + .setEnableSeparateRecognitionPerChannel(true) + .build(); + + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); + + // Print out the results + for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternatives(0); + System.out.format("Transcript : %s\n", alternative.getTranscript()); + System.out.printf("Channel Tag : %s\n\n", result.getChannelTag()); + } + } + } + // [END speech_transcribe_multichannel_beta] + + // [START speech_transcribe_multichannel_gcs_beta] + /** + * Transcribe a remote audio file with multi-channel recognition + * + * @param gcsUri the path to the audio file + */ + public static void transcribeMultiChannelGcs(String gcsUri) throws Exception { + + try (SpeechClient speechClient = SpeechClient.create()) { + + // Configure request to enable multiple channels + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(44100) + .setAudioChannelCount(2) + .setEnableSeparateRecognitionPerChannel(true) + .build(); + + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speechClient.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + // Just print the first result here. + for (SpeechRecognitionResult result : response.get().getResultsList()) { + + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + + // Print out the result + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + System.out.printf("Channel Tag : %s\n\n", result.getChannelTag()); + } + } + } + // [END speech_transcribe_multichannel_gcs_beta] + + // [START speech_transcribe_multilanguage_beta] + /** + * Transcribe a local audio file with multi-language recognition + * + * @param fileName the path to the audio file + */ + public static void transcribeMultiLanguage(String fileName) throws Exception { + Path path = Paths.get(fileName); + // Get the contents of the local audio file + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + ArrayList languageList = new ArrayList<>(); + languageList.add("es-ES"); + languageList.add("en-US"); + + // Configure request to enable multiple languages + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRateHertz(16000) + .setLanguageCode("ja-JP") + .addAllAlternativeLanguageCodes(languageList) + .build(); + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); + + // Print out the results + for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternatives(0); + System.out.format("Transcript : %s\n\n", alternative.getTranscript()); + } + } + } + // [END speech_transcribe_multilanguage_beta] + + // [START speech_transcribe_multilanguage_gcs_beta] + /** + * Transcribe a remote audio file with multi-language recognition + * + * @param gcsUri the path to the remote audio file + */ + public static void transcribeMultiLanguageGcs(String gcsUri) throws Exception { + try (SpeechClient speechClient = SpeechClient.create()) { + + ArrayList languageList = new ArrayList<>(); + languageList.add("es-ES"); + languageList.add("en-US"); + + // Configure request to enable multiple languages + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRateHertz(16000) + .setLanguageCode("ja-JP") + .addAllAlternativeLanguageCodes(languageList) + .build(); + + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speechClient.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + + for (SpeechRecognitionResult result : response.get().getResultsList()) { + + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + + // Print out the result + System.out.printf("Transcript : %s\n\n", alternative.getTranscript()); + } + } + } + // [END speech_transcribe_multilanguage_gcs_beta] + + // [START speech_transcribe_word_level_confidence_beta] + /** + * Transcribe a local audio file with word level confidence + * + * @param fileName the path to the local audio file + */ + public static void transcribeWordLevelConfidence(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + // Configure request to enable word level confidence + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRateHertz(16000) + .setLanguageCode("en-US") + .setEnableWordConfidence(true) + .build(); + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); + + // Print out the results + for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternatives(0); + System.out.format("Transcript : %s\n", alternative.getTranscript()); + System.out.format( + "First Word and Confidence : %s %s \n", + alternative.getWords(0).getWord(), alternative.getWords(0).getConfidence()); + } + } + } + // [END speech_transcribe_word_level_confidence_beta] + + // [START speech_transcribe_word_level_confidence_gcs_beta] + /** + * Transcribe a remote audio file with word level confidence + * + * @param gcsUri path to the remote audio file + */ + public static void transcribeWordLevelConfidenceGcs(String gcsUri) throws Exception { + try (SpeechClient speechClient = SpeechClient.create()) { + + // Configure request to enable word level confidence + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setSampleRateHertz(44100) + .setLanguageCode("en-US") + .setEnableWordConfidence(true) + .build(); + + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speechClient.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + // Just print the first result here. + SpeechRecognitionResult result = response.get().getResultsList().get(0); + + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + // Print out the result + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + System.out.format( + "First Word and Confidence : %s %s \n", + alternative.getWords(0).getWord(), alternative.getWords(0).getConfidence()); + } + } + // [END speech_transcribe_word_level_confidence_gcs_beta] +} diff --git a/speech/src/main/java/com/example/speech/SpeechAdaptation.java b/speech/src/main/java/com/example/speech/SpeechAdaptation.java new file mode 100644 index 00000000000..4c51672d134 --- /dev/null +++ b/speech/src/main/java/com/example/speech/SpeechAdaptation.java @@ -0,0 +1,73 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +// [START speech_adaptation_beta] +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognizeRequest; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechContext; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import java.io.IOException; + +public class SpeechAdaptation { + + public void speechAdaptation() throws IOException { + String uriPath = "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"; + speechAdaptation(uriPath); + } + + public static void speechAdaptation(String uriPath) throws IOException { + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. After completing all of your requests, call + // the "close" method on the client to safely clean up any remaining background resources. + try (SpeechClient speechClient = SpeechClient.create()) { + + // Provides "hints" to the speech recognizer to favor specific words and phrases in the + // results. + // https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1p1beta1#google.cloud.speech.v1p1beta1.SpeechContext + SpeechContext speechContext = + SpeechContext.newBuilder().addPhrases("Brooklyn Bridge").setBoost(20.0F).build(); + // Configure recognition config to match your audio file. + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(RecognitionConfig.AudioEncoding.MP3) + .setSampleRateHertz(44100) + .setLanguageCode("en-US") + .addSpeechContexts(speechContext) + .build(); + // Set the path to your audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(uriPath).build(); + + // Make the request + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + + // Display the results + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } + } +} +// [END speech_adaptation_beta] diff --git a/speech/src/main/java/com/example/speech/SpeechProfanityFilter.java b/speech/src/main/java/com/example/speech/SpeechProfanityFilter.java new file mode 100644 index 00000000000..b8ee99215d5 --- /dev/null +++ b/speech/src/main/java/com/example/speech/SpeechProfanityFilter.java @@ -0,0 +1,70 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +// [START speech_transcribe_with_profanity_filter_gcs] +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import java.util.List; + +public class SpeechProfanityFilter { + + public void speechProfanityFilter() throws Exception { + String uriPath = "gs://cloud-samples-tests/speech/brooklyn.flac"; + speechProfanityFilter(uriPath); + } + + /** + * Transcribe a remote audio file with multi-channel recognition + * + * @param gcsUri the path to the audio file + */ + public static void speechProfanityFilter(String gcsUri) throws Exception { + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create()) { + + // Configure remote file request + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setProfanityFilter(true) + .build(); + + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use blocking call to get audio transcript + RecognizeResponse response = speech.recognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s\n", alternative.getTranscript()); + } + } + } +} +// [END speech_transcribe_with_profanity_filter_gcs] diff --git a/speech/src/main/java/com/example/speech/TranscribeDiarization.java b/speech/src/main/java/com/example/speech/TranscribeDiarization.java new file mode 100644 index 00000000000..6778f4c5907 --- /dev/null +++ b/speech/src/main/java/com/example/speech/TranscribeDiarization.java @@ -0,0 +1,98 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +// [START speech_transcribe_diarization] + +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeakerDiarizationConfig; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.WordInfo; +import com.google.protobuf.ByteString; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +class TranscribeDiarization { + + static void transcribeDiarization() throws IOException { + // TODO(developer): Replace these variables before running the sample. + String fileName = "resources/commercial_mono.wav"; + transcribeDiarization(fileName); + } + + // Transcribe the given audio file using speaker diarization. + static void transcribeDiarization(String fileName) throws IOException { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. After completing all of your requests, call + // the "close" method on the client to safely clean up any remaining background resources. + try (SpeechClient client = SpeechClient.create()) { + // Get the contents of the local audio file + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + SpeakerDiarizationConfig speakerDiarizationConfig = + SpeakerDiarizationConfig.newBuilder() + .setEnableSpeakerDiarization(true) + .setMinSpeakerCount(2) + .setMaxSpeakerCount(2) + .build(); + // Configure request to enable Speaker diarization + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(8000) + .setDiarizationConfig(speakerDiarizationConfig) + .build(); + + // Perform the transcription request + RecognizeResponse recognizeResponse = client.recognize(config, recognitionAudio); + + // Speaker Tags are only included in the last result object, which has only one alternative. + SpeechRecognitionAlternative alternative = + recognizeResponse.getResults(recognizeResponse.getResultsCount() - 1).getAlternatives(0); + // The alternative is made up of WordInfo objects that contain the speaker_tag. + WordInfo wordInfo = alternative.getWords(0); + int currentSpeakerTag = wordInfo.getSpeakerTag(); + // For each word, get all the words associated with one speaker, once the speaker changes, + // add a new line with the new speaker and their spoken words. + StringBuilder speakerWords = + new StringBuilder( + String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord())); + for (int i = 1; i < alternative.getWordsCount(); i++) { + wordInfo = alternative.getWords(i); + if (currentSpeakerTag == wordInfo.getSpeakerTag()) { + speakerWords.append(" "); + speakerWords.append(wordInfo.getWord()); + } else { + speakerWords.append( + String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord())); + currentSpeakerTag = wordInfo.getSpeakerTag(); + } + } + System.out.println(speakerWords.toString()); + } + } +} +// [END speech_transcribe_diarization] diff --git a/speech/src/main/java/com/example/speech/TranscribeDiarizationGcs.java b/speech/src/main/java/com/example/speech/TranscribeDiarizationGcs.java new file mode 100644 index 00000000000..de7245b9a21 --- /dev/null +++ b/speech/src/main/java/com/example/speech/TranscribeDiarizationGcs.java @@ -0,0 +1,98 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +// [START speech_transcribe_diarization_gcs] + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; +import com.google.cloud.speech.v1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.SpeakerDiarizationConfig; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.WordInfo; +import java.io.IOException; +import java.util.concurrent.ExecutionException; + +public class TranscribeDiarizationGcs { + + static void transcribeDiarizationGcs() + throws IOException, ExecutionException, InterruptedException { + // TODO(developer): Replace these variables before running the sample. + String gcsUri = "gs://cloud-samples-data/speech/commercial_mono.wav"; + transcribeDiarizationGcs(gcsUri); + } + + // Transcribe the give gcs file using speaker diarization + public static void transcribeDiarizationGcs(String gcsUri) + throws IOException, ExecutionException, InterruptedException { + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. After completing all of your requests, call + // the "close" method on the client to safely clean up any remaining background resources. + try (SpeechClient speechClient = SpeechClient.create()) { + SpeakerDiarizationConfig speakerDiarizationConfig = + SpeakerDiarizationConfig.newBuilder() + .setEnableSpeakerDiarization(true) + .setMinSpeakerCount(2) + .setMaxSpeakerCount(2) + .build(); + // Configure request to enable Speaker diarization + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(8000) + .setDiarizationConfig(speakerDiarizationConfig) + .build(); + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture future = + speechClient.longRunningRecognizeAsync(config, audio); + System.out.println("Waiting for response..."); + + // Speaker Tags are only included in the last result object, which has only one alternative. + LongRunningRecognizeResponse response = future.get(); + SpeechRecognitionAlternative alternative = + response.getResults(response.getResultsCount() - 1).getAlternatives(0); + // The alternative is made up of WordInfo objects that contain the speaker_tag. + WordInfo wordInfo = alternative.getWords(0); + int currentSpeakerTag = wordInfo.getSpeakerTag(); + // For each word, get all the words associated with one speaker, once the speaker changes, + // add a new line with the new speaker and their spoken words. + StringBuilder speakerWords = + new StringBuilder( + String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord())); + for (int i = 1; i < alternative.getWordsCount(); i++) { + wordInfo = alternative.getWords(i); + if (currentSpeakerTag == wordInfo.getSpeakerTag()) { + speakerWords.append(" "); + speakerWords.append(wordInfo.getWord()); + } else { + speakerWords.append( + String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord())); + currentSpeakerTag = wordInfo.getSpeakerTag(); + } + } + System.out.println(speakerWords.toString()); + } + } +} +// [END speech_transcribe_diarization_gcs] diff --git a/speech/src/test/java/com/example/speech/QuickstartSampleIT.java b/speech/src/test/java/com/example/speech/QuickstartSampleIT.java new file mode 100644 index 00000000000..ed739930161 --- /dev/null +++ b/speech/src/test/java/com/example/speech/QuickstartSampleIT.java @@ -0,0 +1,57 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for quickstart sample. */ +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class QuickstartSampleIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testQuickstart() throws Exception { + // Act + QuickstartSample.main(); + + // Assert + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } +} diff --git a/speech/src/test/java/com/example/speech/RecognizeBetaIT.java b/speech/src/test/java/com/example/speech/RecognizeBetaIT.java new file mode 100644 index 00000000000..17fe91f1f12 --- /dev/null +++ b/speech/src/test/java/com/example/speech/RecognizeBetaIT.java @@ -0,0 +1,129 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for speech recognize sample. */ +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class RecognizeBetaIT { + private static final String BUCKET = "cloud-samples-data"; + + private ByteArrayOutputStream bout; + private PrintStream out; + + // The path to the audio file to transcribe + private String audioFileName = "./resources/audio.raw"; + private String multiChannelAudioFileName = "./resources/commercial_stereo.wav"; + private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav"; + private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn_bridge.flac"; + private String gcsDiarizationAudioPath = "gs://" + BUCKET + "/speech/commercial_mono.wav"; + + // The path to the video file to transcribe + private String videoFileName = "./resources/Google_Gnome.wav"; + private String gcsVideoPath = "gs://" + BUCKET + "/speech/Google_Gnome.wav"; + + private String recognitionAudioFile = "./resources/commercial_mono.wav"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testMetadata() throws Exception { + RecognizeBeta.transcribeFileWithMetadata(recognitionAudioFile); + String got = bout.toString(); + assertThat(got).contains("Chrome"); + } + + @Test + public void testTranscribeDiarization() throws Exception { + RecognizeBeta.transcribeDiarization(recognitionAudioFile); + String got = bout.toString(); + // Diarization (a beta product) can be flaky, therefore this test is only looking for output + assertThat(got).contains("Speaker"); + } + + @Test + public void testTranscribeDiarizationGcs() throws Exception { + RecognizeBeta.transcribeDiarizationGcs(gcsDiarizationAudioPath); + String got = bout.toString(); + // Diarization (a beta product) can be flaky, therefore this test is only looking for output + assertThat(got).contains("Speaker"); + } + + @Test + public void testTranscribeMultiChannel() throws Exception { + RecognizeBeta.transcribeMultiChannel(multiChannelAudioFileName); + String got = bout.toString(); + assertThat(got).contains("Channel Tag : 1"); + } + + @Test + public void testTranscribeMultiChannelGcs() throws Exception { + RecognizeBeta.transcribeMultiChannelGcs(gcsMultiChannelAudioPath); + String got = bout.toString(); + assertThat(got).contains("Channel Tag : 1"); + } + + @Test + public void testTranscribeMultiLanguage() throws Exception { + RecognizeBeta.transcribeMultiLanguage(videoFileName); + String got = bout.toString(); + assertThat(got).contains("Transcript : OK Google"); + } + + @Test + public void testTranscribeMultiLanguageGcs() throws Exception { + RecognizeBeta.transcribeMultiLanguageGcs(gcsVideoPath); + String got = bout.toString(); + assertThat(got).contains("Transcript : OK Google"); + } + + @Test + public void testTranscribeWordLevelConfidence() throws Exception { + RecognizeBeta.transcribeWordLevelConfidence(audioFileName); + String got = bout.toString(); + assertThat(got).contains("Transcript : how old is the Brooklyn Bridge"); + assertThat(got).contains("First Word and Confidence : how"); + } + + @Test + public void testTranscribeWordLevelConfidenceGcs() throws Exception { + RecognizeBeta.transcribeWordLevelConfidenceGcs(gcsAudioPath); + String got = bout.toString(); + assertThat(got).contains("Transcript : how old is the Brooklyn Bridge"); + assertThat(got).contains("First Word and Confidence : how"); + } +} diff --git a/speech/src/test/java/com/example/speech/RecognizeIT.java b/speech/src/test/java/com/example/speech/RecognizeIT.java new file mode 100644 index 00000000000..2de1b0a1b45 --- /dev/null +++ b/speech/src/test/java/com/example/speech/RecognizeIT.java @@ -0,0 +1,170 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for speech recognize sample. */ +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class RecognizeIT { + private static final String BUCKET = "cloud-samples-tests"; + + private ByteArrayOutputStream bout; + private PrintStream out; + + // The path to the audio file to transcribe + private String audioFileName = "./resources/audio.raw"; + private String multiChannelAudioFileName = "./resources/commercial_stereo.wav"; + private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac"; + private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav"; + + private String recognitionAudioFile = "./resources/commercial_mono.wav"; + + // The path to the video file to transcribe + private String videoFileName = "./resources/Google_Gnome.wav"; + private String gcsVideoPath = "gs://" + BUCKET + "/speech/Google_Gnome.wav"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testRecognizeFile() throws Exception { + Recognize.syncRecognizeFile(audioFileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testRecognizeWordoffset() throws Exception { + Recognize.syncRecognizeWords(audioFileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + assertThat(got).contains("\t0.0 sec -"); + } + + @Test + public void testRecognizeGcs() throws Exception { + Recognize.syncRecognizeGcs(gcsAudioPath); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testAsyncRecognizeFile() throws Exception { + Recognize.asyncRecognizeFile(audioFileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testAsyncRecognizeGcs() throws Exception { + Recognize.asyncRecognizeGcs(gcsAudioPath); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testAsyncWordoffset() throws Exception { + Recognize.asyncRecognizeWords(gcsAudioPath); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + assertThat(got).contains("\t0.0 sec -"); + } + + @Test + public void testStreamRecognize() throws Exception { + Recognize.streamingRecognizeFile(audioFileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } + + @Test + public void testAutoPunctuation() throws Exception { + Recognize.transcribeFileWithAutomaticPunctuation(audioFileName); + String got = bout.toString(); + assertThat(got).contains("Transcript"); + } + + @Test + public void testGcsAutoPunctuation() throws Exception { + Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath); + String got = bout.toString(); + assertThat(got).contains("Transcript"); + } + + @Test + public void testStreamAutoPunctuation() throws Exception { + Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName); + String got = bout.toString(); + assertThat(got).contains("Transcript"); + } + + @Test + public void testEnhancedModel() throws Exception { + Recognize.transcribeFileWithEnhancedModel(recognitionAudioFile); + String got = bout.toString(); + assertThat(got).contains("Chrome"); + } + + @Test + public void testModelSelection() throws Exception { + Recognize.transcribeModelSelection(videoFileName); + String got = bout.toString(); + assertThat(got).contains("OK Google"); + assertThat(got).contains("the weather outside is sunny"); + } + + @Test + public void testGcsModelSelection() throws Exception { + Recognize.transcribeModelSelectionGcs(gcsVideoPath); + String got = bout.toString(); + assertThat(got).contains("OK Google"); + assertThat(got).contains("the weather outside is sunny"); + } + + @Test + public void testTranscribeMultiChannel() throws Exception { + Recognize.transcribeMultiChannel(multiChannelAudioFileName); + String got = bout.toString(); + assertThat(got).contains("Channel Tag : 1"); + } + + @Test + public void testTranscribeMultiChannelGcs() throws Exception { + Recognize.transcribeMultiChannelGcs(gcsMultiChannelAudioPath); + String got = bout.toString(); + assertThat(got).contains("Channel Tag : 1"); + } +} diff --git a/speech/src/test/java/com/example/speech/SpeechAdaptationTest.java b/speech/src/test/java/com/example/speech/SpeechAdaptationTest.java new file mode 100644 index 00000000000..a31b3637d5d --- /dev/null +++ b/speech/src/test/java/com/example/speech/SpeechAdaptationTest.java @@ -0,0 +1,55 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class SpeechAdaptationTest { + private static final String AUDIO_FILE = "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"; + private ByteArrayOutputStream bout; + private PrintStream out; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testTranscribeContextClasses() throws IOException { + SpeechAdaptation.speechAdaptation(AUDIO_FILE); + String got = bout.toString(); + assertThat(got).contains("Transcript:"); + } +} diff --git a/speech/src/test/java/com/example/speech/SpeechProfanityFilterTest.java b/speech/src/test/java/com/example/speech/SpeechProfanityFilterTest.java new file mode 100644 index 00000000000..ddf1ccaeecb --- /dev/null +++ b/speech/src/test/java/com/example/speech/SpeechProfanityFilterTest.java @@ -0,0 +1,56 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class SpeechProfanityFilterTest { + private static final String AUDIO_FILE = "gs://cloud-samples-tests/speech/brooklyn.flac"; + private ByteArrayOutputStream bout; + private PrintStream stdout; + private PrintStream out; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + stdout = System.out; + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(stdout); + } + + @Test + public void testSpeechProfanityFilter() throws Exception { + SpeechProfanityFilter.speechProfanityFilter(AUDIO_FILE); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } +} diff --git a/speech/src/test/java/com/example/speech/TranscribeDiarizationIT.java b/speech/src/test/java/com/example/speech/TranscribeDiarizationIT.java new file mode 100644 index 00000000000..ce69cdd2286 --- /dev/null +++ b/speech/src/test/java/com/example/speech/TranscribeDiarizationIT.java @@ -0,0 +1,80 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; +import static junit.framework.TestCase.assertNotNull; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.concurrent.ExecutionException; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +// Tests for speech Transcribe Diarization samples. +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class TranscribeDiarizationIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + // The path to the audio file to transcribe + private String recognitionAudioFile = "./resources/commercial_mono.wav"; + + private static void requireEnvVar(String varName) { + assertNotNull( + System.getenv(varName), + "Environment variable '%s' is required to perform these tests.".format(varName)); + } + + @BeforeClass + public static void checkRequirements() { + requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS"); + } + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testDiarization() throws IOException { + TranscribeDiarization.transcribeDiarization(recognitionAudioFile); + String got = bout.toString(); + assertThat(got).contains("Speaker"); + } + + @Test + public void testDiarizationGcs() throws IOException, ExecutionException, InterruptedException { + TranscribeDiarizationGcs.transcribeDiarizationGcs( + "gs://cloud-samples-data/speech/commercial_mono.wav"); + String got = bout.toString(); + assertThat(got).contains("Speaker"); + } +}