feat(api): add service tier argument for chat completions (openai#900)

deyaaeldeen · Aug 10, 2024 · b62842b · b62842b
1 parent e1f81ed
commit b62842b
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 1 deletion.
diff --git a/.stats.yml b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 64
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-5cb1810135c35c5024698f3365626471a04796e26e393aefe1aa0ba3c0891919.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8fe357c6b5a425d810d731e4102a052d8e38c5e2d66950e6de1025415160bf88.yml
diff --git a/src/resources/chat/completions.ts b/src/resources/chat/completions.ts
@@ -66,6 +66,12 @@ export interface ChatCompletion {
    */
   object: 'chat.completion';
 
+  /**
+   * The service tier used for processing the request. This field is only included if
+   * the `service_tier` parameter is specified in the request.
+   */
+  service_tier?: 'scale' | 'default' | null;
+
   /**
    * This fingerprint represents the backend configuration that the model runs with.
    *
@@ -205,6 +211,12 @@ export interface ChatCompletionChunk {
    */
   object: 'chat.completion.chunk';
 
+  /**
+   * The service tier used for processing the request. This field is only included if
+   * the `service_tier` parameter is specified in the request.
+   */
+  service_tier?: 'scale' | 'default' | null;
+
   /**
    * This fingerprint represents the backend configuration that the model runs with.
    * Can be used in conjunction with the `seed` request parameter to understand when
@@ -800,6 +812,19 @@ export interface ChatCompletionCreateParamsBase {
    */
   seed?: number | null;
 
+  /**
+   * Specifies the latency tier to use for processing the request. This parameter is
+   * relevant for customers subscribed to the scale tier service:
+   *
+   * - If set to 'auto', the system will utilize scale tier credits until they are
+   *   exhausted.
+   * - If set to 'default', the request will be processed in the shared cluster.
+   *
+   * When this parameter is set, the response body will include the `service_tier`
+   * utilized.
+   */
+  service_tier?: 'auto' | 'default' | null;
+
   /**
    * Up to 4 sequences where the API will stop generating further tokens.
    */

diff --git a/tests/api-resources/chat/completions.test.ts b/tests/api-resources/chat/completions.test.ts
@@ -38,6 +38,7 @@ describe('resource completions', () => {
       presence_penalty: -2,
       response_format: { type: 'json_object' },
       seed: -9223372036854776000,
+      service_tier: 'auto',
       stop: 'string',
       stream: false,
       stream_options: { include_usage: true },