Skip to content

Commit

Permalink
Fix the sleep logic for streaming as per sample rate (#279)
Browse files Browse the repository at this point in the history
* sleep 100ms instead of sleeping as function of rate
* added sleep as inverse of sampling rate
* added LINEAR16 bytes per sample constant
* added few variables
* added truth and log4j
* added 32khz audio
* added Streaming Test
* added unit tests for 16 and 32 khz audio
* checkstyle fixes
* renamed to KHz
* added comment
  • Loading branch information
puneith authored and lesv committed Aug 4, 2016
1 parent ebdad96 commit 16407a9
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 8 deletions.
11 changes: 11 additions & 0 deletions speech/grpc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,17 @@ limitations under the License.
<version>1.1.33.Fork14</version>
<classifier>${tcnative.classifier}</classifier>
</dependency>
<dependency>
<groupId>com.google.truth</groupId>
<artifactId>truth</artifactId>
<version>0.28</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
</dependencies>
<!-- // [END dependency] -->

Expand Down
Binary file added speech/grpc/resources/audio32KHz.raw
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

package com.examples.cloud.speech;

import static org.apache.log4j.ConsoleAppender.SYSTEM_OUT;

import com.google.cloud.speech.v1beta1.RecognitionConfig;
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1beta1.SpeechGrpc;
Expand All @@ -26,7 +28,6 @@
import com.google.protobuf.TextFormat;

import io.grpc.ManagedChannel;
import io.grpc.Status;
import io.grpc.stub.StreamObserver;

import org.apache.commons.cli.CommandLine;
Expand All @@ -35,6 +36,10 @@
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.log4j.ConsoleAppender;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.SimpleLayout;

import java.io.File;
import java.io.FileInputStream;
Expand All @@ -43,8 +48,7 @@
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;


/**
* Client that sends streaming audio to Speech.Recognize and returns streaming transcript.
Expand All @@ -60,6 +64,9 @@ public class StreamingRecognizeClient {

private final SpeechGrpc.SpeechStub speechClient;

private static final int BYTES_PER_BUFFER = 3200; //buffer size in bytes
private static final int BYTES_PER_SAMPLE = 2; //bytes per sample for LINEAR16

private static final List<String> OAUTH2_SCOPES =
Arrays.asList("https://www.googleapis.com/auth/cloud-platform");

Expand All @@ -73,6 +80,13 @@ public StreamingRecognizeClient(ManagedChannel channel, String file, int samplin
this.channel = channel;

speechClient = SpeechGrpc.newStub(channel);

//Send log4j logs to Console
//If you are going to run this on GCE, you might wish to integrate with gcloud-java logging.
//See https://github.com/GoogleCloudPlatform/gcloud-java/blob/master/README.md#stackdriver-logging-alpha

ConsoleAppender appender = new ConsoleAppender(new SimpleLayout(), SYSTEM_OUT);
logger.addAppender(appender);
}

public void shutdown() throws InterruptedException {
Expand All @@ -91,8 +105,7 @@ public void onNext(StreamingRecognizeResponse response) {

@Override
public void onError(Throwable error) {
Status status = Status.fromThrowable(error);
logger.log(Level.WARNING, "recognize failed: {0}", status);
logger.log(Level.WARN, "recognize failed: {0}", error);
finishLatch.countDown();
}

Expand Down Expand Up @@ -127,9 +140,12 @@ public void onCompleted() {
// Open audio file. Read and send sequential buffers of audio as additional RecognizeRequests.
FileInputStream in = new FileInputStream(new File(file));
// For LINEAR16 at 16000 Hz sample rate, 3200 bytes corresponds to 100 milliseconds of audio.
byte[] buffer = new byte[3200];
byte[] buffer = new byte[BYTES_PER_BUFFER];
int bytesRead;
int totalBytes = 0;
int samplesPerBuffer = BYTES_PER_BUFFER / BYTES_PER_SAMPLE;
int samplesPerMillis = samplingRate / 1000;

while ((bytesRead = in.read(buffer)) != -1) {
totalBytes += bytesRead;
StreamingRecognizeRequest request =
Expand All @@ -138,8 +154,7 @@ public void onCompleted() {
.build();
requestObserver.onNext(request);
// To simulate real-time audio, sleep after sending each audio buffer.
// For 16000 Hz sample rate, sleep 100 milliseconds.
Thread.sleep(samplingRate / 160);
Thread.sleep(samplesPerBuffer / samplesPerMillis);
}
logger.info("Sent " + totalBytes + " bytes from audio file: " + file);
} catch (RuntimeException e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright 2016 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.examples.cloud.speech;

import static com.google.common.truth.Truth.assertThat;

import io.grpc.ManagedChannel;
import org.apache.log4j.Logger;
import org.apache.log4j.SimpleLayout;
import org.apache.log4j.WriterAppender;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URI;
import java.nio.file.Path;
import java.nio.file.Paths;


/**
* Unit tests for {@link StreamingRecognizeClient }.
*/
@RunWith(JUnit4.class)
public class StreamingRecognizeClientTest {
private Writer writer;
private WriterAppender appender;

@Before
public void setUp() {
writer = new StringWriter();
appender = new WriterAppender(new SimpleLayout(), writer);
Logger.getRootLogger().addAppender(appender);
}

@After
public void tearDown() {
Logger.getRootLogger().removeAppender(appender);
}

@Test
public void test16KHzAudio() throws InterruptedException, IOException {
URI uri = new File("resources/audio.raw").toURI();
Path path = Paths.get(uri);

String host = "speech.googleapis.com";
int port = 443;
ManagedChannel channel = AsyncRecognizeClient.createChannel(host, port);
StreamingRecognizeClient client = new StreamingRecognizeClient(channel, path.toString(), 16000);

client.recognize();
assertThat(writer.toString()).contains("transcript: \"how old is the Brooklyn Bridge\"");
}

@Test
public void test32KHzAudio() throws InterruptedException, IOException {
URI uri = new File("resources/audio32KHz.raw").toURI();
Path path = Paths.get(uri);

String host = "speech.googleapis.com";
int port = 443;
ManagedChannel channel = AsyncRecognizeClient.createChannel(host, port);
StreamingRecognizeClient client = new StreamingRecognizeClient(channel, path.toString(), 32000);

client.recognize();
assertThat(writer.toString()).contains("transcript: \"how old is the Brooklyn Bridge\"");
}
}

0 comments on commit 16407a9

Please sign in to comment.