Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
feat: add total_billed_time response field (#787)
Browse files Browse the repository at this point in the history
Committer: @cherba
PiperOrigin-RevId: 389755489
  • Loading branch information
gcf-owl-bot[bot] authored Aug 10, 2021
1 parent 6daad6a commit 171cba0
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 18 deletions.
54 changes: 38 additions & 16 deletions protos/google/cloud/speech/v1/cloud_speech.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2019 Google LLC.
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -11,7 +11,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

Expand All @@ -24,6 +23,7 @@ import "google/longrunning/operations.proto";
import "google/protobuf/any.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";
import "google/rpc/status.proto";

option cc_enable_arenas = true;
Expand Down Expand Up @@ -136,6 +136,16 @@ message StreamingRecognitionConfig {
// `END_OF_SINGLE_UTTERANCE` event and cease recognition. It will return no
// more than one `StreamingRecognitionResult` with the `is_final` flag set to
// `true`.
//
// The `single_utterance` field can only be used with specified models,
// otherwise an error is thrown. The `model` field in [`RecognitionConfig`][]
// must be set to:
//
// * `command_and_search`
// * `phone_call` AND additional field `useEnhanced`=`true`
// * The `model` field is left undefined. In this case the API auto-selects
// a model based on any other parameters that you set in
// `RecognitionConfig`.
bool single_utterance = 2;

// If `true`, interim results (tentative hypotheses) may be
Expand All @@ -158,7 +168,7 @@ message RecognitionConfig {
// a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
// recognition can be reduced if lossy codecs are used to capture or transmit
// audio, particularly if background noise is present. Lossy codecs include
// `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, and `MP3`.
// `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, `MP3`.
//
// The `FLAC` and `WAV` audio file formats include a header that describes the
// included audio content. You can request recognition for `WAV` files that
Expand Down Expand Up @@ -274,7 +284,7 @@ message RecognitionConfig {
// A means to provide context to assist the speech recognition. For more
// information, see
// [speech
// adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
// adaptation](https://cloud.google.com/speech-to-text/docs/adaptation).
repeated SpeechContext speech_contexts = 6;

// If `true`, the top result includes a list of words and
Expand All @@ -287,9 +297,6 @@ message RecognitionConfig {
// This feature is only available in select languages. Setting this for
// requests in other languages has no effect at all.
// The default 'false' value does not add punctuation to result hypotheses.
// Note: This is currently offered as an experimental service, complimentary
// to all users. In the future this may be exclusively available as a
// premium feature.
bool enable_automatic_punctuation = 11;

// Config to enable speaker diarization and set additional
Expand Down Expand Up @@ -325,7 +332,7 @@ message RecognitionConfig {
// </tr>
// <tr>
// <td><code>video</code></td>
// <td>Best for audio that originated from from video or includes multiple
// <td>Best for audio that originated from video or includes multiple
// speakers. Ideally the audio is recorded at a 16khz or greater
// sampling rate. This is a premium model that costs more than the
// standard rate.</td>
Expand Down Expand Up @@ -367,9 +374,11 @@ message SpeakerDiarizationConfig {
// number of speakers. If not set, the default value is 6.
int32 max_speaker_count = 3;

// Unused.
int32 speaker_tag = 5
[(google.api.field_behavior) = OUTPUT_ONLY, deprecated = true];
// Output only. Unused.
int32 speaker_tag = 5 [
deprecated = true,
(google.api.field_behavior) = OUTPUT_ONLY
];
}

// Description of audio data to be recognized.
Expand Down Expand Up @@ -548,6 +557,9 @@ message RecognizeResponse {
// Sequential list of transcription results corresponding to
// sequential portions of audio.
repeated SpeechRecognitionResult results = 2;

// When available, billed audio seconds for the corresponding request.
google.protobuf.Duration total_billed_time = 3;
}

// The only message returned to the client by the `LongRunningRecognize` method.
Expand All @@ -559,6 +571,9 @@ message LongRunningRecognizeResponse {
// Sequential list of transcription results corresponding to
// sequential portions of audio.
repeated SpeechRecognitionResult results = 2;

// When available, billed audio seconds for the corresponding request.
google.protobuf.Duration total_billed_time = 3;
}

// Describes the progress of a long-running `LongRunningRecognize` call. It is
Expand All @@ -574,6 +589,10 @@ message LongRunningRecognizeMetadata {

// Time of the most recent processing update.
google.protobuf.Timestamp last_update_time = 3;

// Output only. The URI of the audio file being transcribed. Empty if the audio was sent
// as byte content.
string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// `StreamingRecognizeResponse` is the only message returned to the client by
Expand All @@ -582,8 +601,8 @@ message LongRunningRecognizeMetadata {
// audio, and `single_utterance` is set to false, then no messages are streamed
// back to the client.
//
// Here's an example of a series of ten `StreamingRecognizeResponse`s that might
// be returned while processing audio:
// Here's an example of a series of `StreamingRecognizeResponse`s that might be
// returned while processing audio:
//
// 1. results { alternatives { transcript: "tube" } stability: 0.01 }
//
Expand Down Expand Up @@ -653,6 +672,10 @@ message StreamingRecognizeResponse {

// Indicates the type of speech event.
SpeechEventType speech_event_type = 4;

// When available, billed audio seconds for the stream.
// Set only if this is the last response in the stream.
google.protobuf.Duration total_billed_time = 5;
}

// A streaming speech recognition result corresponding to a portion of the audio
Expand Down Expand Up @@ -749,11 +772,10 @@ message WordInfo {
// The word corresponding to this set of information.
string word = 3;

// A distinct integer value is assigned for every speaker within
// Output only. A distinct integer value is assigned for every speaker within
// the audio. This field specifies which one of those speakers was detected to
// have spoken this word. Value ranges from '1' to diarization_speaker_count.
// speaker_tag is set if enable_speaker_diarization = 'true' and only in the
// top alternative.
int32 speaker_tag = 5
[(google.api.field_behavior) = OUTPUT_ONLY];
int32 speaker_tag = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
}
24 changes: 24 additions & 0 deletions protos/protos.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 171cba0

Please sign in to comment.