Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speech multi-channel GA #1341

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public static void main(String... args) throws Exception {
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
+ "\t| wordoffsets | auto-punctuation | stream-punctuation \n"
+ "\t| enhanced-model | model-selection\n"
+ "\t| enhanced-model | model-selection | multi-channel\n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
Expand Down Expand Up @@ -112,6 +112,12 @@ public static void main(String... args) throws Exception {
} else {
transcribeModelSelection(path);
}
} else if (command.equals("multi-channel")) {
if (path.startsWith("gs://")) {
transcribeMultiChannelGcs(path);
} else {
transcribeMultiChannel(path);
}
}
}

Expand Down Expand Up @@ -830,4 +836,90 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
}
}
// [END speech_transcribe_model_selection_gcs]

// [START speech_transcribe_multichannel]
/**
* Transcribe a local audio file with multi-channel recognition
*
* @param fileName the path to local audio file
*/
public static void transcribeMultiChannel(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);

try (SpeechClient speechClient = SpeechClient.create()) {
// Get the contents of the local audio file
RecognitionAudio recognitionAudio =
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();

// Configure request to enable multiple channels
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(44100)
.setAudioChannelCount(2)
.setEnableSeparateRecognitionPerChannel(true)
.build();

// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);

// Print out the results
for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
System.out.format("Transcript : %s\n", alternative.getTranscript());
System.out.printf("Channel Tag : %s\n", result.getChannelTag());
}
}
}
// [END speech_transcribe_multichannel]

// [START speech_transcribe_multichannel_gcs]
/**
* Transcribe a remote audio file with multi-channel recognition
*
* @param gcsUri the path to the audio file
*/
public static void transcribeMultiChannelGcs(String gcsUri) throws Exception {

try (SpeechClient speechClient = SpeechClient.create()) {

// Configure request to enable multiple channels
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(44100)
.setAudioChannelCount(2)
.setEnableSeparateRecognitionPerChannel(true)
.build();

// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();

// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
speechClient.longRunningRecognizeAsync(config, audio);

while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
// Just print the first result here.
for (SpeechRecognitionResult result : response.get().getResultsList()) {

// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);

// Print out the result
System.out.printf("Transcript : %s\n", alternative.getTranscript());
System.out.printf("Channel Tag : %s\n", result.getChannelTag());
}
}
}
// [END speech_transcribe_multichannel_gcs]
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ public class RecognizeIT {

// The path to the audio file to transcribe
private String audioFileName = "./resources/audio.raw";
private String multiChannelAudioFileName = "./resources/commercial_stereo.wav";
private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac";
private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav";

private String recognitionAudioFile = "./resources/commercial_mono.wav";

// The path to the video file to transcribe
Expand Down Expand Up @@ -150,4 +153,18 @@ public void testGcsModelSelection() throws Exception {
assertThat(got).contains("OK Google");
assertThat(got).contains("the weather outside is sunny");
}

@Test
public void testTranscribeMultiChannel() throws Exception {
Recognize.transcribeMultiChannel(multiChannelAudioFileName);
String got = bout.toString();
assertThat(got).contains("Channel Tag : 1");
}

@Test
public void testTranscribeMultiChannelGcs() throws Exception {
Recognize.transcribeMultiChannelGcs(gcsMultiChannelAudioPath);
String got = bout.toString();
assertThat(got).contains("Channel Tag : 1");
}
}