From a8ac347e9c00dcaeb7e4b6188d66e3e1b5ceb350 Mon Sep 17 00:00:00 2001 From: Nirupa Anantha Kumar Date: Tue, 19 Feb 2019 14:37:07 -0800 Subject: [PATCH] samples: Speech multi-channel GA (#1341) --- .../java/com/example/speech/Recognize.java | 94 ++++++++++++++++++- .../java/com/example/speech/RecognizeIT.java | 17 ++++ 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/speech/snippets/src/main/java/com/example/speech/Recognize.java b/speech/snippets/src/main/java/com/example/speech/Recognize.java index de05ad84185..ede4d7b7c14 100644 --- a/speech/snippets/src/main/java/com/example/speech/Recognize.java +++ b/speech/snippets/src/main/java/com/example/speech/Recognize.java @@ -64,7 +64,7 @@ public static void main(String... args) throws Exception { + "Commands:\n" + "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n" + "\t| wordoffsets | auto-punctuation | stream-punctuation \n" - + "\t| enhanced-model | model-selection\n" + + "\t| enhanced-model | model-selection | multi-channel\n" + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI " + "for a Cloud Storage resource (gs://...)\n", Recognize.class.getCanonicalName()); @@ -112,6 +112,12 @@ public static void main(String... args) throws Exception { } else { transcribeModelSelection(path); } + } else if (command.equals("multi-channel")) { + if (path.startsWith("gs://")) { + transcribeMultiChannelGcs(path); + } else { + transcribeMultiChannel(path); + } } } @@ -830,4 +836,90 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception { } } // [END speech_transcribe_model_selection_gcs] + + // [START speech_transcribe_multichannel] + /** + * Transcribe a local audio file with multi-channel recognition + * + * @param fileName the path to local audio file + */ + public static void transcribeMultiChannel(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + // Get the contents of the local audio file + RecognitionAudio recognitionAudio = + RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); + + // Configure request to enable multiple channels + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(44100) + .setAudioChannelCount(2) + .setEnableSeparateRecognitionPerChannel(true) + .build(); + + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); + + // Print out the results + for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternatives(0); + System.out.format("Transcript : %s\n", alternative.getTranscript()); + System.out.printf("Channel Tag : %s\n", result.getChannelTag()); + } + } + } + // [END speech_transcribe_multichannel] + + // [START speech_transcribe_multichannel_gcs] + /** + * Transcribe a remote audio file with multi-channel recognition + * + * @param gcsUri the path to the audio file + */ + public static void transcribeMultiChannelGcs(String gcsUri) throws Exception { + + try (SpeechClient speechClient = SpeechClient.create()) { + + // Configure request to enable multiple channels + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(44100) + .setAudioChannelCount(2) + .setEnableSeparateRecognitionPerChannel(true) + .build(); + + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speechClient.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + // Just print the first result here. + for (SpeechRecognitionResult result : response.get().getResultsList()) { + + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + + // Print out the result + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + System.out.printf("Channel Tag : %s\n", result.getChannelTag()); + } + } + } + // [END speech_transcribe_multichannel_gcs] } diff --git a/speech/snippets/src/test/java/com/example/speech/RecognizeIT.java b/speech/snippets/src/test/java/com/example/speech/RecognizeIT.java index 3bc3d5f1611..2eef6f05807 100644 --- a/speech/snippets/src/test/java/com/example/speech/RecognizeIT.java +++ b/speech/snippets/src/test/java/com/example/speech/RecognizeIT.java @@ -37,7 +37,10 @@ public class RecognizeIT { // The path to the audio file to transcribe private String audioFileName = "./resources/audio.raw"; + private String multiChannelAudioFileName = "./resources/commercial_stereo.wav"; private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac"; + private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav"; + private String recognitionAudioFile = "./resources/commercial_mono.wav"; // The path to the video file to transcribe @@ -150,4 +153,18 @@ public void testGcsModelSelection() throws Exception { assertThat(got).contains("OK Google"); assertThat(got).contains("the weather outside is sunny"); } + + @Test + public void testTranscribeMultiChannel() throws Exception { + Recognize.transcribeMultiChannel(multiChannelAudioFileName); + String got = bout.toString(); + assertThat(got).contains("Channel Tag : 1"); + } + + @Test + public void testTranscribeMultiChannelGcs() throws Exception { + Recognize.transcribeMultiChannelGcs(gcsMultiChannelAudioPath); + String got = bout.toString(); + assertThat(got).contains("Channel Tag : 1"); + } }