feat: [texttospeech] StreamingSynthesize now supports opus (#5887)

* feat: StreamingSynthesize now supports opus PiperOrigin-RevId: 707168599 Source-Link: googleapis/googleapis@d985436 Source-Link: googleapis/googleapis-gen@3fcc3af Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiM2ZjYzNhZmJmOGM5MjA4NGNjNGEzMDIzMmE3NmNhMjQ3NDg5YzNkMCJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
googleapis · Dec 17, 2024 · c335f19 · c335f19
1 parent 7e82cf2
commit c335f19
Show file tree

Hide file tree

Showing 4 changed files with 471 additions and 3 deletions.
diff --git a/packages/google-cloud-texttospeech/protos/google/cloud/texttospeech/v1beta1/cloud_tts.proto b/packages/google-cloud-texttospeech/protos/google/cloud/texttospeech/v1beta1/cloud_tts.proto
@@ -115,6 +115,11 @@ enum AudioEncoding {
   // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/A-law.
   // Audio content returned as ALAW also contains a WAV header.
   ALAW = 6;
+
+  // Uncompressed 16-bit signed little-endian samples (Linear PCM).
+  // Note that as opposed to LINEAR16, audio will not be wrapped in a WAV (or
+  // any other) header.
+  PCM = 7;
 }
 
 // The top-level message sent by the client for the `ListVoices` method.
@@ -432,10 +437,25 @@ message Timepoint {
   double time_seconds = 3;
 }
 
+// Description of the desired output audio data.
+message StreamingAudioConfig {
+  // Required. The format of the audio byte stream.
+  // For now, streaming only supports PCM and OGG_OPUS. All other encodings
+  // will return an error.
+  AudioEncoding audio_encoding = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. The synthesis sample rate (in hertz) for this audio.
+  int32 sample_rate_hertz = 2 [(google.api.field_behavior) = OPTIONAL];
+}
+
 // Provides configuration information for the StreamingSynthesize request.
 message StreamingSynthesizeConfig {
   // Required. The desired voice of the synthesized audio.
   VoiceSelectionParams voice = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. The configuration of the synthesized audio.
+  StreamingAudioConfig streaming_audio_config = 4
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Input to be synthesized.

diff --git a/packages/google-cloud-texttospeech/protos/protos.d.ts b/packages/google-cloud-texttospeech/protos/protos.d.ts