Skip to content

Commit

Permalink
samples: Speech multi-channel GA (#1341)
Browse files Browse the repository at this point in the history
  • Loading branch information
nirupa-kumar authored and chingor13 committed Aug 15, 2020
1 parent 6dd4c26 commit a8ac347
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 1 deletion.
94 changes: 93 additions & 1 deletion speech/snippets/src/main/java/com/example/speech/Recognize.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public static void main(String... args) throws Exception {
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
+ "\t| wordoffsets | auto-punctuation | stream-punctuation \n"
+ "\t| enhanced-model | model-selection\n"
+ "\t| enhanced-model | model-selection | multi-channel\n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
Expand Down Expand Up @@ -112,6 +112,12 @@ public static void main(String... args) throws Exception {
} else {
transcribeModelSelection(path);
}
} else if (command.equals("multi-channel")) {
if (path.startsWith("gs://")) {
transcribeMultiChannelGcs(path);
} else {
transcribeMultiChannel(path);
}
}
}

Expand Down Expand Up @@ -830,4 +836,90 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
}
}
// [END speech_transcribe_model_selection_gcs]

// [START speech_transcribe_multichannel]
/**
* Transcribe a local audio file with multi-channel recognition
*
* @param fileName the path to local audio file
*/
public static void transcribeMultiChannel(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);

try (SpeechClient speechClient = SpeechClient.create()) {
// Get the contents of the local audio file
RecognitionAudio recognitionAudio =
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();

// Configure request to enable multiple channels
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(44100)
.setAudioChannelCount(2)
.setEnableSeparateRecognitionPerChannel(true)
.build();

// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);

// Print out the results
for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
System.out.format("Transcript : %s\n", alternative.getTranscript());
System.out.printf("Channel Tag : %s\n", result.getChannelTag());
}
}
}
// [END speech_transcribe_multichannel]

// [START speech_transcribe_multichannel_gcs]
/**
* Transcribe a remote audio file with multi-channel recognition
*
* @param gcsUri the path to the audio file
*/
public static void transcribeMultiChannelGcs(String gcsUri) throws Exception {

try (SpeechClient speechClient = SpeechClient.create()) {

// Configure request to enable multiple channels
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(44100)
.setAudioChannelCount(2)
.setEnableSeparateRecognitionPerChannel(true)
.build();

// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();

// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
speechClient.longRunningRecognizeAsync(config, audio);

while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
// Just print the first result here.
for (SpeechRecognitionResult result : response.get().getResultsList()) {

// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);

// Print out the result
System.out.printf("Transcript : %s\n", alternative.getTranscript());
System.out.printf("Channel Tag : %s\n", result.getChannelTag());
}
}
}
// [END speech_transcribe_multichannel_gcs]
}
17 changes: 17 additions & 0 deletions speech/snippets/src/test/java/com/example/speech/RecognizeIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ public class RecognizeIT {

// The path to the audio file to transcribe
private String audioFileName = "./resources/audio.raw";
private String multiChannelAudioFileName = "./resources/commercial_stereo.wav";
private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac";
private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav";

private String recognitionAudioFile = "./resources/commercial_mono.wav";

// The path to the video file to transcribe
Expand Down Expand Up @@ -150,4 +153,18 @@ public void testGcsModelSelection() throws Exception {
assertThat(got).contains("OK Google");
assertThat(got).contains("the weather outside is sunny");
}

@Test
public void testTranscribeMultiChannel() throws Exception {
Recognize.transcribeMultiChannel(multiChannelAudioFileName);
String got = bout.toString();
assertThat(got).contains("Channel Tag : 1");
}

@Test
public void testTranscribeMultiChannelGcs() throws Exception {
Recognize.transcribeMultiChannelGcs(gcsMultiChannelAudioPath);
String got = bout.toString();
assertThat(got).contains("Channel Tag : 1");
}
}

0 comments on commit a8ac347

Please sign in to comment.