Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
Add User input + max tokens requested exceeds model context window er…
Browse files Browse the repository at this point in the history
…ror response (#1325)

* added User input + max tokens requested exceeds model context window error response.

Signed-off-by: Ye, Xinyu <xinyu.ye@intel.com>
  • Loading branch information
XinyuYe-Intel authored Feb 29, 2024
1 parent a7c15a9 commit ae91bf8
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
2 changes: 2 additions & 0 deletions intel_extension_for_transformers/neural_chat/errorcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class ErrorCodes:
ERROR_MODEL_NOT_SUPPORTED = 2006
ERROR_HF_TOKEN_NOT_PROVIDED = 2007
WARNING_INPUT_EXCEED_MAX_SEQ_LENGTH = 2101
WARNING_INPUT_COMPLETION_EXCEED_MAX_SEQ_LENGTH = 2102

# General Service Error Code - Dataset related
ERROR_DATASET_NOT_FOUND = 3001
Expand Down Expand Up @@ -83,6 +84,7 @@ class ErrorCodes:
ERROR_INVALID_MODEL_VERSION: "Invalid model version",
ERROR_MODEL_NOT_SUPPORTED: "Model is not supported",
WARNING_INPUT_EXCEED_MAX_SEQ_LENGTH: "Input sequence exceeds maximum length",
WARNING_INPUT_COMPLETION_EXCEED_MAX_SEQ_LENGTH: "Input and completion sequence exceeds maximum length",

ERROR_DATASET_NOT_FOUND: "Dataset was not found",
ERROR_DATASET_CONFIG_NOT_FOUND: "Dataset configuration not found",
Expand Down
16 changes: 16 additions & 0 deletions intel_extension_for_transformers/neural_chat/models/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,6 +1082,14 @@ def predict_stream(**params):
)
set_latest_error(ErrorCodes.WARNING_INPUT_EXCEED_MAX_SEQ_LENGTH)
return
elif length < max_new_tokens:
logging.error(f"This model's maximum context length is {context_len} tokens. \
However, you requested {input_token_len+max_new_tokens} tokens ({input_token_len} \
in the messages, {max_new_tokens} in the completion). Please reduce the length \
of the messages or completion.",
)
set_latest_error(ErrorCodes.WARNING_INPUT_COMPLETION_EXCEED_MAX_SEQ_LENGTH)
return

generate_kwargs = get_generate_kwargs(
max_new_tokens, input_token_len,
Expand Down Expand Up @@ -1383,6 +1391,14 @@ def predict(**params):
)
set_latest_error(ErrorCodes.WARNING_INPUT_EXCEED_MAX_SEQ_LENGTH)
return
elif length < max_new_tokens:
logging.error(f"This model's maximum context length is {context_len} tokens. \
However, you requested {input_token_len+max_new_tokens} tokens ({input_token_len} \
in the messages, {max_new_tokens} in the completion). Please reduce the length \
of the messages or completion.",
)
set_latest_error(ErrorCodes.WARNING_INPUT_COMPLETION_EXCEED_MAX_SEQ_LENGTH)
return

if device in ["cpu", "cuda", "xpu"]:
if device in ["cuda", "xpu"]:
Expand Down

0 comments on commit ae91bf8

Please sign in to comment.