Skip to content

Commit

Permalink
Merge pull request #22 from Aleph-Alpha/max-tokens-optional
Browse files Browse the repository at this point in the history
Max tokens optional
  • Loading branch information
moldhouse authored Aug 9, 2024
2 parents 4e1dec0 + 5172fbd commit e30a6e6
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 27 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "aleph-alpha-client"
version = "0.10.1"
version = "0.11.0"
edition = "2021"
description = "Interact with large language models provided by the Aleph Alpha API in Rust code"
license = "MIT"
Expand Down
6 changes: 6 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## Unreleased

## 0.11.0

* Add `with_maximum_tokens` method to `Prompt`
* Remove maximum tokens argument from `Prompt::from_text`
* Make maximum tokens optional

## 0.10.1

* Fix: Version number in Cargo.toml
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ fn main() {
let model = "luminous-base";

// The task we want to perform. Here we want to continue the sentence: "An apple a day ..."
let task = TaskCompletion::from_text("An apple a day", 10);
let task = TaskCompletion::from_text("An apple a day");

// Retrieve the answer from the API
let response = client.completion(&task, model, &How::default()).await.unwrap();
Expand Down
37 changes: 29 additions & 8 deletions src/completion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,19 @@ pub struct TaskCompletion<'a> {
}

impl<'a> TaskCompletion<'a> {
/// Convenience constructor leaving most setting to default, just completing a given text and
/// taking the maximum anticipated length of the completion.
pub fn from_text(text: &'a str, maximum_tokens: u32) -> Self {
/// Convenience constructor leaving most setting to default, just completing a given text
pub fn from_text(text: &'a str) -> Self {
TaskCompletion {
prompt: Prompt::from_text(text),
stopping: Stopping::from_maximum_tokens(maximum_tokens),
stopping: Stopping::NO_TOKEN_LIMIT,
sampling: Sampling::MOST_LIKELY,
}
}

pub fn with_maximum_tokens(mut self, maximum_tokens: u32) -> Self {
self.stopping.maximum_tokens = Some(maximum_tokens);
self
}
}

/// Sampling controls how the tokens ("words") are selected for the completion.
Expand Down Expand Up @@ -69,10 +73,13 @@ impl Default for Sampling<'_> {
/// Controls the conditions under which the language models stops generating text.
pub struct Stopping<'a> {
/// The maximum number of tokens to be generated. Completion will terminate after the maximum
/// number of tokens is reached.Increase this value to allow for longer outputs. A text is split
/// number of tokens is reached. Increase this value to allow for longer outputs. A text is split
/// into tokens. Usually there are more tokens than words. The total number of tokens of prompt
/// and maximum_tokens depends on the model.
pub maximum_tokens: u32,
/// If maximum tokens is set to None, no outside limit is opposed on the number of maximum tokens.
/// The model will generate tokens until it either emits a stop token or it reaches its technical
/// limit, which usually is its context window.
pub maximum_tokens: Option<u32>,
/// List of strings which will stop generation if they are generated. Stop sequences are
/// helpful in structured texts. E.g.: In a question answering scenario a text may consist of
/// lines starting with either "Question: " or "Answer: " (alternating). After producing an
Expand All @@ -83,15 +90,28 @@ pub struct Stopping<'a> {
}

impl<'a> Stopping<'a> {
/// Only stop once the model generates end of text, or it reaches its technical limit, usually the
/// context window.
pub const NO_TOKEN_LIMIT: Self = Stopping {
maximum_tokens: None,
stop_sequences: &[],
};

/// Only stop once the model generates end of text, or maximum tokens are reached.
pub fn from_maximum_tokens(maximum_tokens: u32) -> Self {
Self {
maximum_tokens,
maximum_tokens: Some(maximum_tokens),
stop_sequences: &[],
}
}
}

impl Default for Stopping<'_> {
fn default() -> Self {
Self::NO_TOKEN_LIMIT
}
}

/// Body send to the Aleph Alpha API on the POST `/completion` Route
#[derive(Serialize, Debug)]
struct BodyCompletion<'a> {
Expand All @@ -100,7 +120,8 @@ struct BodyCompletion<'a> {
/// Prompt to complete. The modalities supported depend on `model`.
pub prompt: Prompt<'a>,
/// Limits the number of tokens, which are generated for the completion.
pub maximum_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub maximum_tokens: Option<u32>,
/// List of strings which will stop generation if they are generated. Stop sequences are
/// helpful in structured texts. E.g.: In a question answering scenario a text may consist of
/// lines starting with either "Question: " or "Answer: " (alternating). After producing an
Expand Down
2 changes: 1 addition & 1 deletion src/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl HttpClient {
///
/// // The task we want to perform. Here we want to continue the sentence: "An apple a day
/// // ..."
/// let task = TaskCompletion::from_text("An apple a day", 10);
/// let task = TaskCompletion::from_text("An apple a day");
///
/// // Retrieve answer from API
/// let response = client.output_of(&task.with_model(model), &How::default()).await?;
Expand Down
10 changes: 5 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//! let model = "luminous-base";
//!
//! // The task we want to perform. Here we want to continue the sentence: "An apple a day ..."
//! let task = TaskCompletion::from_text("An apple a day", 10);
//! let task = TaskCompletion::from_text("An apple a day");
//!
//! // Retrieve the answer from the API
//! let response = client.completion(&task, model, &How::default()).await.unwrap();
Expand Down Expand Up @@ -65,7 +65,7 @@ impl Client {
/// A new instance of an Aleph Alpha client helping you interact with the Aleph Alpha API.
/// For "normal" client applications you may likely rather use [`Self::with_authentication`] or
/// [`Self::with_base_url`].
///
///
/// You may want to only use request based authentication and skip default authentication. This
/// is useful if writing an application which invokes the client on behalf of many different
/// users. Having neither request, nor default authentication is considered a bug and will cause
Expand All @@ -81,7 +81,7 @@ impl Client {
}

/// Use your on-premise inference with your API token for all requests.
///
///
/// In production you typically would want set this to <https://api.aleph-alpha.com>. Yet
/// you may want to use a different instances for testing.
pub fn with_base_url(host: String, api_token: impl Into<String>) -> Result<Self, Error> {
Expand All @@ -103,7 +103,7 @@ impl Client {
///
/// // The task we want to perform. Here we want to continue the sentence: "An apple a day
/// // ..."
/// let task = TaskCompletion::from_text("An apple a day", 10);
/// let task = TaskCompletion::from_text("An apple a day");
///
/// // Retrieve answer from API
/// let response = client.execute(model, &task, &How::default()).await?;
Expand Down Expand Up @@ -167,7 +167,7 @@ impl Client {
///
/// // The task we want to perform. Here we want to continue the sentence: "An apple a day
/// // ..."
/// let task = TaskCompletion::from_text("An apple a day", 10);
/// let task = TaskCompletion::from_text("An apple a day");
///
/// // Retrieve answer from API
/// let response = client.completion(&task, model, &How::default()).await?;
Expand Down
33 changes: 28 additions & 5 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn api_token() -> &'static str {
#[tokio::test]
async fn completion_with_luminous_base() {
// When
let task = TaskCompletion::from_text("Hello", 1);
let task = TaskCompletion::from_text("Hello").with_maximum_tokens(1);

let model = "luminous-base";
let client = Client::with_authentication(api_token()).unwrap();
Expand All @@ -39,7 +39,7 @@ async fn completion_with_luminous_base() {
#[tokio::test]
async fn request_authentication_has_priority() {
let bad_aa_api_token = "DUMMY";
let task = TaskCompletion::from_text("Hello", 1);
let task = TaskCompletion::from_text("Hello").with_maximum_tokens(1);

let model = "luminous-base";
let client = Client::with_authentication(bad_aa_api_token).unwrap();
Expand All @@ -64,7 +64,7 @@ async fn request_authentication_has_priority() {
async fn authentication_only_per_request() {
// Given
let model = "luminous-base";
let task = TaskCompletion::from_text("Hello", 1);
let task = TaskCompletion::from_text("Hello").with_maximum_tokens(1);

// When
let client = Client::new("https://api.aleph-alpha.com".to_owned(), None).unwrap();
Expand All @@ -88,7 +88,7 @@ async fn authentication_only_per_request() {
async fn must_panic_if_authentication_is_missing() {
// Given
let model = "luminous-base";
let task = TaskCompletion::from_text("Hello", 1);
let task = TaskCompletion::from_text("Hello").with_maximum_tokens(1);

// When
let client = Client::new("https://api.aleph-alpha.com".to_owned(), None).unwrap();
Expand Down Expand Up @@ -172,7 +172,7 @@ async fn complete_structured_prompt() {
let task = TaskCompletion {
prompt: Prompt::from_text(prompt),
stopping: Stopping {
maximum_tokens: 64,
maximum_tokens: Some(64),
stop_sequences: &stop_sequences[..],
},
sampling: Sampling::MOST_LIKELY,
Expand All @@ -190,6 +190,29 @@ async fn complete_structured_prompt() {
assert!(!response.completion.contains("User:"));
}

#[tokio::test]
async fn context_window_stopping() {
// Given
let prompt = "Bot: Hello user!\nUser: Hello Bot, how are you doing?\nBot:";
let stopping = Stopping::NO_TOKEN_LIMIT;

// When
let task = TaskCompletion {
prompt: Prompt::from_text(prompt),
stopping,
sampling: Sampling::MOST_LIKELY,
};
let model = "luminous-base";
let client = Client::with_authentication(api_token()).unwrap();
let response = client
.output_of(&task.with_model(model), &How::default())
.await
.unwrap();

// Then
assert!(!response.completion.is_empty());
}

#[tokio::test]
async fn explain_request() {
// Given
Expand Down
14 changes: 8 additions & 6 deletions tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ async fn completion_with_luminous_base() {
.await;

// When
let task = TaskCompletion::from_text("Hello,", 1);
let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
let model = "luminous-base";
let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
let response = client
Expand Down Expand Up @@ -72,7 +72,7 @@ async fn detect_rate_limiting() {
.await;

// When
let task = TaskCompletion::from_text("Hello,", 1);
let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
let model = "luminous-base";
let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
let error = client
Expand Down Expand Up @@ -116,7 +116,7 @@ async fn detect_queue_full() {
.await;

// When
let task = TaskCompletion::from_text("Hello,", 1);
let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
let model = "luminous-base";
let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
let error = client
Expand Down Expand Up @@ -153,7 +153,7 @@ async fn detect_service_unavailable() {
.await;

// When
let task = TaskCompletion::from_text("Hello,", 1);
let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
let model = "luminous-base";
let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
let error = client
Expand All @@ -175,7 +175,7 @@ async fn be_nice() {
let mock_server = MockServer::start().await;

// When
let task = TaskCompletion::from_text("Hello,", 1);
let task = TaskCompletion::from_text("Hello,").with_maximum_tokens(1);
let model = "luminous-base";
let client = Client::with_base_url(mock_server.uri(), "dummy-token").unwrap();
// Drop result, answer is meaningless anyway
Expand Down Expand Up @@ -211,7 +211,9 @@ async fn client_timeout() {
// When
let result = client
.output_of(
&TaskCompletion::from_text("Hello,", 1).with_model("any"),
&TaskCompletion::from_text("Hello,")
.with_maximum_tokens(1)
.with_model("any"),
&How {
client_timeout: response_time / 2,
..Default::default()
Expand Down

0 comments on commit e30a6e6

Please sign in to comment.