From fec74f4c87e1609ef1ff1304342030dc0b12d99d Mon Sep 17 00:00:00 2001 From: Avnish Narayan Date: Mon, 4 Dec 2023 14:17:59 -0800 Subject: [PATCH 1/3] LLMPerfV2 Signed-off-by: Avnish Narayan --- .gitignore | 400 +++++++----- NOTICE.txt | 14 - README.md | 437 +++++++++++-- analyze-raw.ipynb | 588 ------------------ analyze-token-benchmark-results.ipynb | 327 ++++++++++ env_sample.txt | 19 - llm_correctness.py | 309 +++++++++ llmperf.py | 484 -------------- optional.txt | 2 - pre-commit.sh | 5 + pyproject.toml | 23 + requirements-dev.txt | 2 + requirements.txt | 18 - sonnet.txt | 518 --------------- src/llmperf/__init__.py | 1 + src/llmperf/common.py | 38 ++ src/llmperf/common_metrics.py | 17 + src/llmperf/models.py | 21 + src/llmperf/ray_clients/__init__.py | 0 src/llmperf/ray_clients/litellm_client.py | 100 +++ .../openai_chat_completions_client.py | 120 ++++ src/llmperf/ray_clients/sagemaker_client.py | 158 +++++ src/llmperf/ray_clients/vertexai_client.py | 135 ++++ src/llmperf/ray_llm_client.py | 22 + src/llmperf/requests_launcher.py | 48 ++ src/llmperf/sonnet.txt | 84 +++ src/llmperf/utils.py | 147 +++++ token_benchmark_ray.py | 464 ++++++++++++++ 28 files changed, 2647 insertions(+), 1854 deletions(-) delete mode 100644 NOTICE.txt delete mode 100644 analyze-raw.ipynb create mode 100644 analyze-token-benchmark-results.ipynb delete mode 100644 env_sample.txt create mode 100644 llm_correctness.py delete mode 100644 llmperf.py delete mode 100644 optional.txt create mode 100755 pre-commit.sh create mode 100644 pyproject.toml create mode 100644 requirements-dev.txt delete mode 100644 requirements.txt delete mode 100644 sonnet.txt create mode 100644 src/llmperf/__init__.py create mode 100644 src/llmperf/common.py create mode 100644 src/llmperf/common_metrics.py create mode 100644 src/llmperf/models.py create mode 100644 src/llmperf/ray_clients/__init__.py create mode 100644 src/llmperf/ray_clients/litellm_client.py create mode 100644 src/llmperf/ray_clients/openai_chat_completions_client.py create mode 100644 src/llmperf/ray_clients/sagemaker_client.py create mode 100644 src/llmperf/ray_clients/vertexai_client.py create mode 100644 src/llmperf/ray_llm_client.py create mode 100644 src/llmperf/requests_launcher.py create mode 100644 src/llmperf/sonnet.txt create mode 100644 src/llmperf/utils.py create mode 100644 token_benchmark_ray.py diff --git a/.gitignore b/.gitignore index 17584b8..54047ad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,161 +1,247 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions +# The build output should clearly not be checked in +*test-output.xml +/bazel-* +/python/ray/core +/python/ray/pickle5_files/ +/python/ray/thirdparty_files/ +/python/ray/pyarrow_files/ +/python/ray/jars/ +/python/ray/cpp/ +/python/build +/python/dist +/python/python-driver-* +/python/ray/serve/generated +/thirdparty/pkg/ +/build/java +.jar +/dashboard/client/build + +# Files generated by flatc should be ignored +/src/ray/gcs/format/*_generated.h +/src/ray/object_manager/format/*_generated.h +/src/ray/raylet/format/*_generated.h +/java/runtime/src/main/java/io/ray/runtime/generated/* +/java/serve/src/main/java/io/ray/serve/generated/* + +# Files genrated by c++ worker should be ignored. +/cpp/example/thirdparty/ +/cpp/example/bazel-* +/python/ray/cpp + +# Redis temporary files +*dump.rdb + +# Python byte code files +*.pyc +python/.eggs +*.egg-info + +# Backup files +*.bak + +# Emacs temporary files +*~ +*# + +# Compiled Object files +*.slo +*.lo +*.o +*.xo +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries *.so +*.dylib +*.dll +python/ray/_raylet.pyd + +# Incremental linking files +*.ilk + +# Library export files +*.exp + +# Debug symbols +*.pdb + +# Fortran module files +*.mod +!deploy/ray-operator/go.mod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# Visual Studio files +/packages +*.suo +*.user +*.VC.db +*.VC.opendb + +# Protobuf-generated files +*_pb2.py +*.pb.h +*.pb.cc + +# Ray cluster configuration +scripts/nodes.txt + +# OS X folder attributes +.DS_Store + +# Debug files +*.dSYM/ +*.su + +# Python setup files +*.egg-info + +# Compressed files +*.gz + +# Datasets from examples +**/MNIST_data/ +**/cifar-10-batches-bin/ + +# Generated documentation files +/doc/_build +/doc/source/_static/thumbs +/doc/source/tune/generated_guides/ +/doc/source/**/doc/ + +# User-specific stuff: +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/dictionaries +.llvm-local.bazelrc + +# Sensitive or high-churn files: +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.xml +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml + +# Gradle: +.idea/**/gradle.xml +.idea/**/libraries +.idea + +# Website +/site/Gemfile.lock +/site/.sass-cache +/site/_site + +# Pytest Cache +**/.pytest_cache +**/.cache +.benchmarks +python-driver-* + +# Vscode +.vscode/ + +*.iml + +# Java +java/**/target +java/**/lib +java/**/.settings +java/**/.classpath +java/**/.project +java/runtime/native_dependencies/ +java/testng_custom.xml + +dependency-reduced-pom.xml + +# Cpp +cpp/example/thirdparty/ + +.clwb + +# pom.xml files generated from pom_template.xml +java/**/pom.xml + +# python virtual env +venv + +# pyenv version file +.python-version + +# Vim +.*.swp +*.swp +.*.swo +*.swo +tags +tags.lock +tags.temp +*.vim + +# Emacs +.#* + +# tools +tools/prometheus* + +# ray project files +project-id +.mypy_cache/ + +# release test related +.anyscale.yaml +test_state.json + +# workflow storage +workflow_data/ + +# vscode java extention generated +.factorypath + +# Jupyter Notebooks +**/.ipynb_checkpoints/ + +### Added by Hedron's Bazel Compile Commands Extractor: https://github.com/hedronvision/bazel-compile-commands-extractor +# The external link: Differs on Windows vs macOS/Linux, so we can't check it in. The pattern needs to not have a trailing / because it's a symlink on macOS/Linux. +/external +# Compiled output -> don't check in +/compile_commands.json +# Directory where clangd puts its indexing work +/.cache/ -# Distribution / packaging -.Python +# Auto-generated tag mapping +tag-mapping.json + +.bazeliskrc + +# ignore tmp files +*.tmp +out +temp* + +# build output build/ -develop-eggs/ dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ -.env -results/ + +# results +output/ +*.json +result_outputs/ + +__pycache__ +**/__pycache__/ \ No newline at end of file diff --git a/NOTICE.txt b/NOTICE.txt deleted file mode 100644 index 4820e73..0000000 --- a/NOTICE.txt +++ /dev/null @@ -1,14 +0,0 @@ -[Project Name] -Copyright 2023-onwards Anyscale, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md index 966c4e4..d40604e 100644 --- a/README.md +++ b/README.md @@ -1,78 +1,407 @@ -# llmperf +# LLMPerf -LLMPerf is a tool for benchmarking and validating the performance of LLMs. +A Tool for evaulation the performance of LLM APIs. -Benchmarking: LLMPerf measures time to first token (TTFT), -inter-token latency (ITL) and requests that take longer than 3 seconds -to start returning data. +# Installation +```bash +git clone ... +cd LLMPerf +pip install -e . +``` -Validation: we send a simple query to the LLM and ensure the returned data -is valid. In particular it checks for inter-request cross-over -(request A gets the responses for request B). +# Basic Usage -Variation in input and output token lengths is a design parameter -since this is intended to be representative. This is because -there are some optimizations (e.g. continuous batching) that -we know work better with varying input and output length. +We implement 2 tests for evaluating LLMs: a load test to check for performance and a correctness test to check for correctness. -## Supported endpoints +## Load test -Currently supported endpoints include: +The load test spawns a number of concurrent requests to the LLM API and measures the inter-token latency and generation throughput per request and across concurrent requests. The prompt that is sent with each request is of the format: -- Any OpenAI compatible endpoints, including Anyscale Endpoints, -Anyscale Private Endpoints, OpenAI, Fireworks, Perplexity etc -- Any [Huggingface Text Generation Inference](https://github.com/huggingface/text-generation-inference) endpoints -- Together -- Vertex AI -- SageMaker +``` +Randomly stream lines from the following text. Don't generate eos tokens: +LINE 1, +LINE 2, +LINE 3, +... +``` -Please see `requirements.txt` for more details on dependency requirements. +Where the lines are randomly sampled from a collection of lines from Shakespeare sonnets. Tokens are counted using the `LlamaTokenizer` regardless of which LLM API is being tested. This is to ensure that the prompts are consistent across different LLM APIs. -## Upcoming refactor +To run the most basic load test you can the token_benchmark_ray script. -This is prototype code. We are currently refactoring the code to be more -extensible (including a pluggable endpoints, varying traffic load etc). +### OpenAI Compatible APIs +```bash +export OPENAI_API_KEY=secret_abcdefg +export OPENAI_API_BASE="https://api.endpoints.anyscale.com/v1" -In addition we plan to: +python token_benchmark_ray.py \ +--model "meta-llama/Llama-2-7b-chat-hf" \ +--mean-input-tokens 550 \ +--stddev-input-tokens 150 \ +--mean-output-tokens 150 \ +--stddev-output-tokens 10 \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ +--llm-api openai \ +--additional-sampling-params '{}' -- Make running the benchmark not only possible from -command line, but also possible to integrate easily into CI/CD or job scheduling -systems. -- Control where the generated files and information go. -- Automate report generation. +``` -We expect this refactor to be complete some time in November 2023. +### Anthropic +```bash +export ANTHROPIC_API_KEY=secret_abcdefg -## A note on rate limits +python token_benchmark_ray.py \ +--model "claude-2" \ +--mean-input-tokens 550 \ +--stddev-input-tokens 150 \ +--mean-output-tokens 150 \ +--stddev-output-tokens 10 \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ +--llm-api anthropic \ +--additional-sampling-params '{}' -Many LLM providers have extremely low rate limits by default (e.g. Perplexity 3 requests per 90 seconds). +``` -You can use the sleep parameter to overcome these difficulties, but it does affect the representativeness of the results. +### TogetherAI -Other systems do not have rate limits, but we consider that if the TTFT exceeds 3 second for more than -5% of queries that the system is overloaded. +```bash +export TOGETHERAI_API_KEY="YOUR_TOGETHER_KEY" +python token_benchmark_ray.py \ +--model "together_ai/togethercomputer/CodeLlama-7b-Instruct" \ +--mean-input-tokens 550 \ +--stddev-input-tokens 150 \ +--mean-output-tokens 150 \ +--stddev-output-tokens 10 \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ +--llm-api "litellm" \ +--additional-sampling-params '{}' -## Default values +``` -Default values are the ones that we use for testing Anyscale Endpoints. -The distribution of inputs and outputs roughly mirrors the input and output -patterns we see there. +### HuggingFacAPI -We recommend setting the seed (or using the provided seed) to reduce variance but -still have randomization. +```bash +export HUGGINGFACE_API_KEY="YOUR_HUGGINGFACE_API_KEY" +export HUGGINGFACE_API_BASE="YOUR_HUGGINGFACE_API_ENDPOINT" -Do a python llmperf.py --help to see all options. +python token_benchmark_ray.py \ +--model "huggingface/meta-llama/Llama-2-7b-chat-hf" \ +--mean-input-tokens 550 \ +--stddev-input-tokens 150 \ +--mean-output-tokens 150 \ +--stddev-output-tokens 10 \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ +--llm-api "litellm" \ +--additional-sampling-params '{}' -## Usage -1. Provide API base and key in .env file. Check out env_sample.txt -2. Test out Anyscale Endpoint with following command by sending 20 requests -`python llmperf.py -r 20 -m "meta-llama/Llama-2-70b-chat-hf"` -3. Control input token numbers by setting min/max lines, and control output token number by setting req-lines and max_tokens -`python llmperf.py -r 20 -f openai -m "gpt-3.5-turbo" --min-lines 8 --max-lines 10` -`python llmperf.py -r 20 -f openai -m "gpt-3.5-turbo" --req-lines 3 --max-tokens 128` -4. Control sleep between rounds to avoid hitting rate limit -`python llmperf.py -r 20 -f fireworks -m "accounts/fireworks/models/llama-v2-70b-chat" --sleep 10` -5. Output will be saved at **framework-timestamp.json** and **framework-timestamp_raw.json** -6. Use Jupyter with analyze-raw.ipynb to visualize and/or interact with the raw data. +``` +### LiteLLM + +LLMPerf can use LiteLLM to send prompts to LLM APIs. To see the environment variables to set for the provider and arguments that one should set for model and additional-sampling-params. + +see the [LiteLLM Provider Documentation](https://docs.litellm.ai/docs/providers). + +```bash +python token_benchmark_ray.py \ +--model "meta-llama/Llama-2-7b-chat-hf" \ +--mean-input-tokens 550 \ +--stddev-input-tokens 150 \ +--mean-output-tokens 150 \ +--stddev-output-tokens 10 \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ +--llm-api "litellm" \ +--additional-sampling-params '{}' + +``` + +### Vertex AI + +Here, --model is used for logging, not for selecting the model. The model is specified in the Vertex AI Endpoint ID. + +The GCLOUD_ACCESS_TOKEN needs to be somewhat regularly set, as the token generated by `gcloud auth print-access-token` expires after 15 minutes or so. + +Vertex AI doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer. + +```bash + +gcloud auth application-default login +gcloud config set project YOUR_PROJECT_ID + +export GCLOUD_ACCESS_TOKEN=$(gcloud auth print-access-token) +export GCLOUD_PROJECT_ID=YOUR_PROJECT_ID +export GCLOUD_REGION=YOUR_REGION +export VERTEXAI_ENDPOINT_ID=YOUR_ENDPOINT_ID + +python token_benchmark_ray.py \ +--model "meta-llama/Llama-2-7b-chat-hf" \ +--mean-input-tokens 550 \ +--stddev-input-tokens 150 \ +--mean-output-tokens 150 \ +--stddev-output-tokens 10 \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ +--llm-api "vertexai" \ +--additional-sampling-params '{}' + +``` + +# Sagemaker + +Sagemaker doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer. + +```bash + +export AWS_ACCESS_KEY_ID="YOUR_ACCESS_KEY_ID" +export AWS_SECRET_ACCESS_KEY="YOUR_SECRET_ACCESS_KEY"s +export AWS_SESSION_TOKEN="YOUR_SESSION_TOKEN" +export AWS_REGION_NAME="YOUR_ENDPOINTS_REGION_NAME" + +python llm_correctness.py \ +--model "llama-2-7b" \ +--llm-api "sagemaker" \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ + +``` + +see `python token_benchmark_ray.py --help` for more details on the arguments. + +## Correctness Test + +The correctness test spawns a number of concurrent requests to the LLM API with the following format: + +``` +Convert the following sequence of words into a number: {random_number_in_word_format}. Output just your final answer. +``` + +where random_number_in_word_format could be for example "one hundred and twenty three". The test then checks that the response contains that number in digit format which in this case would be 123. + +The test does this for a number of randomly generated numbers and reports the number of responses that contain a mismatch. + +To run the most basic correctness test you can run the the llm_correctness.py script. + +### OpenAI Compatible APIs + +```bash +export OPENAI_API_KEY=secret_abcdefg +export OPENAI_API_BASE=https://console.endpoints.anyscale.com/m/v1 + +python llm_correctness.py \ +--model "meta-llama/Llama-2-7b-chat-hf" \ +--max-num-completed-requests 150 \ +--timeout 600 \ +--num-concurrent-requests 10 \ +--results-dir "result_outputs" +``` + +### Anthropic + +```bash +export ANTHROPIC_API_KEY=secret_abcdefg + +python llm_correctness.py \ +--model "claude-2" \ +--llm-api "anthropic" \ +--max-num-completed-requests 5 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" +``` + +### TogetherAI + +```bash +export TOGETHERAI_API_KEY="YOUR_TOGETHER_KEY" + +python llm_correctness.py \ +--model "together_ai/togethercomputer/CodeLlama-7b-Instruct" \ +--llm-api "litellm" \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ + +``` + +### HuggingFacAPI + +```bash +export HUGGINGFACE_API_KEY="YOUR_HUGGINGFACE_API_KEY" +export HUGGINGFACE_API_BASE="YOUR_HUGGINGFACE_API_ENDPOINT" + +python llm_correctness.py \ +--model "huggingface/meta-llama/Llama-2-7b-chat-hf" \ +--llm-api "litellm" \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ + +``` + +### LiteLLM + +LLMPerf can use LiteLLM to send prompts to LLM APIs. To see the environment variables to set for the provider and arguments that one should set for model and additional-sampling-params. + +see the [LiteLLM Provider Documentation](https://docs.litellm.ai/docs/providers). + +```bash +python llm_correctness.py \ +--model "meta-llama/Llama-2-7b-chat-hf" \ +--llm-api "litellm" \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ + +``` + +see `python llm_correctness.py --help` for more details on the arguments. + + +### Vertex AI + +Here, --model is used for logging, not for selecting the model. The model is specified in the Vertex AI Endpoint ID. + +The GCLOUD_ACCESS_TOKEN needs to be somewhat regularly set, as the token generated by `gcloud auth print-access-token` expires after 15 minutes or so. + +Vertex AI doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer. + + +```bash + +gcloud auth application-default login +gcloud config set project YOUR_PROJECT_ID + +export GCLOUD_ACCESS_TOKEN=$(gcloud auth print-access-token) +export GCLOUD_PROJECT_ID=YOUR_PROJECT_ID +export GCLOUD_REGION=YOUR_REGION +export VERTEXAI_ENDPOINT_ID=YOUR_ENDPOINT_ID + +python llm_correctness.py \ +--model "meta-llama/Llama-2-7b-chat-hf" \ +--llm-api "vertexai" \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ + +``` + +### Sagemaker + +Sagemaker doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer. + +```bash + +export AWS_ACCESS_KEY_ID="YOUR_ACCESS_KEY_ID" +export AWS_SECRET_ACCESS_KEY="YOUR_SECRET_ACCESS_KEY"s +export AWS_SESSION_TOKEN="YOUR_SESSION_TOKEN" +export AWS_REGION_NAME="YOUR_ENDPOINTS_REGION_NAME" + +python llm_correctness.py \ +--model "llama-2-7b" \ +--llm-api "sagemaker" \ +--max-num-completed-requests 2 \ +--timeout 600 \ +--num-concurrent-requests 1 \ +--results-dir "result_outputs" \ + +``` + +## Saving Results + +The results of the load test and correctness test are saved in the results directory specified by the `--results-dir` argument. The results are saved in 2 files, one with the summary metrics of the test, and one with metrics from each individual request that is returned. + +# Advanced Usage + +The correctness tests were implemented with the following workflow in mind: + +```python +import ray +from transformers import LlamaTokenizerFast + +from llmperf.ray_clients.openai_chat_completions_client import ( + OpenAIChatCompletionsClient, +) +from llmperf.models import RequestConfig +from llmperf.requests_launcher import RequestsLauncher + + +# Copying the environment variables and passing them to ray.init() is necessary +# For making any clients work. +ray.init(runtime_env={"env_vars": {"OPENAI_API_BASE" : "https://api.endpoints.anyscale.com/v1", + "OPENAI_API_KEY" : "YOUR_API_KEY"}}) + +base_prompt = "hello_world" +tokenizer = LlamaTokenizerFast.from_pretrained( + "hf-internal-testing/llama-tokenizer" +) +base_prompt_len = len(tokenizer.encode(base_prompt)) +prompt = (base_prompt, base_prompt_len) + +# Create a client for spawning requests +clients = [OpenAIChatCompletionsClient.remote()] + +req_launcher = RequestsLauncher(clients) + +req_config = RequestConfig( + model="meta-llama/Llama-2-7b-chat-hf", + prompt=prompt + ) + +req_launcher.launch_requests(req_config) +result = req_launcher.get_next_ready(block=True) +print(result) + +``` + +# Implementing New LLM Clients + +To implement a new LLM client, you need to implement the base class `llmperf.ray_llm_client.LLMClient` and decorate it as a ray actor. + +```python + +from llmperf.ray_llm_client import LLMClient +import ray + + +@ray.remote +class CustomLLMClient(LLMClient): + + def llm_request(self, request_config: RequestConfig) -> Tuple[Metrics, str, RequestConfig]: + """Make a single completion request to a LLM API + + Returns: + Metrics about the performance charateristics of the request. + The text generated by the request to the LLM API. + The request_config used to make the request. This is mainly for logging purposes. + + """ + ... + +``` + +# Legacy Codebase +The old LLMPerf code base can be found in the [llmperf-legacy](https://github.com/ray-project/llmval-legacy) repo. diff --git a/analyze-raw.ipynb b/analyze-raw.ipynb deleted file mode 100644 index a272fd7..0000000 --- a/analyze-raw.ipynb +++ /dev/null @@ -1,588 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 39, - "id": "dacfe98a-e81b-4089-9506-97a652993b5b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import time\n", - "import datetime\n" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "17f7abe9-ed9e-466c-b034-577489aaf98b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df = pd.read_json('anyscale-1697499992_raw.json')" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "56da958f-694d-4e3e-a559-a275ae22d5d4", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Validity results:\n", - "Exception 419\n", - "OK 181\n", - "Name: valid, dtype: int64\n", - "Clean DF is: 181\n", - "Mean TTFT: 2209 ms (mean tokens in: 512, out: 138)\n", - "TTFT > 3 s: 24.86%\n", - "ITL (out): 77.51 ms/token, mean tokens/s output (out): 12.93 token/s\n" - ] - } - ], - "source": [ - "# This is the standard analysis we would do in the real script\n", - "\n", - "print('Validity results:')\n", - "print(df['valid'].value_counts())\n", - "cdf = df[df.valid !='Exception'].copy()\n", - "print(f'Clean DF is: {len(cdf)}')\n", - "cdf['inter_tokens_delay'] = cdf.total_time/cdf.tokens_out\n", - "cdf['total_tokens_per_s'] = (cdf.tokens_out + cdf.tokens_in)/cdf.total_time\n", - "cdf['out_tokens_per_s'] = cdf.tokens_out/cdf.total_time\n", - "mean_tokens_in = cdf['tokens_in'].mean() \n", - "mean_tokens_out = cdf['tokens_out'].mean() \n", - "mean_ttft = cdf['ttft'].mean()\n", - "gt_3_ttft = len(cdf[cdf['ttft'] > 3])/len(cdf)\n", - "print(f'Mean TTFT: {mean_ttft*1000:.0f} ms (mean tokens in: {mean_tokens_in:.0f}, out: {mean_tokens_out:.0f})')\n", - "print(f'TTFT > 3 s: {gt_3_ttft*100:.2f}%')\n", - "print(f'ITL (out): {cdf.inter_tokens_delay.mean()*1000:.2f} ms/token, mean tokens/s output (out): {cdf.out_tokens_per_s.mean():.2f} token/s')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "99936985-a21f-4738-9021-f9db8d67769a", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "181" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(cdf)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "4f5db74b-63ab-4268-b1a7-10b14641efb1", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGxCAYAAACeKZf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAABDa0lEQVR4nO3deXxV9Z3/8fclhGAkCUsSEjSQUCiKCkQWjXRYLG61rbQdx8k4PxDRx0NF0UFnLGOrbWfG2KrTWmpxHFFkVGy1dd9KWStSNolCFySSBEQCYUlCEg0hOb8/2tzmJnc5995z71nu6/l43McDzj335Lud7/dztu/xGYZhCAAAwCP62J0AAAAAKxHcAAAATyG4AQAAnkJwAwAAPIXgBgAAeArBDQAA8BSCGwAA4CkENwAAwFP62p2AZOvs7NSnn36qrKws+Xw+u5MDAABMMAxDJ06c0LBhw9SnT/hzMykX3Hz66acqKiqyOxkAACAG+/fv15lnnhl2nZQLbrKysiT9pXCys7NtTg0AADCjqalJRUVF/nE8nJQLbrouRWVnZxPcAADgMmZuKeGGYgAA4CkENwAAwFMIbgAAgKcQ3AAAAE8huAEAAJ5CcAMAADyF4AYAAHgKwQ0AAPAUghsAAOApBDcAAMBTUu71C4Bb7a1vVu2xVhUPOV0luae78m9Yuf1klIcbUA5AbwQ3gMM1tJ7UwpWV2rCn3r9s2ug8LSkvVU5muiv+hpXbT0Z5uIGV5UCAhJ7c3iZ8hmEYdicimZqampSTk6PGxkZPvDjT7Q0Qkc1ZtkUbq46oo9uumubzaeqoXK2YP0VS/O3AzN9IdB66RMpLotPqFlaUA4GitWLdD53Ujzu5TUQzfnPmxmW6doLBmf308G8+cmQD9AKndDZ765sD6rhLh2Fow556fbC/Iap2ECxfkf5G9ZGWuMrA7PbNdKrJSKsT6j2SeMqhex7/7cUPtL32eMD3G6uO6LaVOxwbKDqtjtbvPqzfVx/Vu3uOaOeBJv/yySMG6Ym5k8P2x04MJBaurNTGqiMBy3q2ia46SPNJHYYcUxfdEdy4RLCdoCend0pu4LTO5o+fNoX9/p6Xd+pPn54IWBasHYTLV+2x1rB/o+ZofAGD2e2b6VQTlVan1XsksZTDB/uP656XdmlXhDZlVaBoNafVUe3RFs1+dKOOt7YH/X5r7XHNeGit1t01M2T6zLT5ZIrlYKqL0/YXnpZyiWA7QU/dOyXEJlxnY4fl79WE/X7XgaaAyxJS8HYQLl8jBmeG/RvFQ+Ib4Mxsv6tTjZSXRKXVafUeSTTl0NB6UnOWbdFVj74XMbDpruaos/oRp9VRuMCmy/HWdt2wYmvQ78y2+WSKFDTf8/LOkOOQ0/YXghsXCLUThOK0TsktnNbZ7K1v1rYelwy6G50/IOzvu9pBpHz5fD5NG52nNJ8v4Pu0vy6P9+h9ZN6AiNs3cybC7Lai5bR6NyOacli4slLvVoU+4xtKvEGtlZxWR+t3H44Y2HTZWnM8aPrMtvlkihQ0BzuY6uK0/YXgxgUi7QQ9OalTchOndTaR0vOPU4rCft/VDszka0l5qaaOyg1YPnVUrpaUl5pIaWSRth/NmQir0+q0ejfLTDl0BQWdUTw20scnS4JaKzmtjio/aYhq/WDpS/QZ01iEC5rPHWbuARyn7C/cc+MCkXaCLl1PSjipU3ITp3U2kdJz8VlDtX73kZBPzHS1AzP5yslM14r5U1R9pEU1R1ssv0Ew0va7OtVIeTGzrWg5rd7NMlMO0R4YSdLEEYMsC2qt4rQ6mnDmwKjWD5a+aNp8Mi0pL9VtK3cE3FczdVSu7rz0i7rq0Y0Rf++U/YUzNy4QKpruycoj7VSUiEseiU6PmaP3aPJVknu6Zo7JT1hew20/2jMyVqXVafUerXDlYPbASPrLYDB5xCC9cNNFjrkptIvT6mj6mHwNMlFGaRHOgiX6jGksuoLmtXfN0FPzJmvtXTO0Yv4UjS8aGHYcctr+wjw3LtHY2t4rmp42Ok93XfpFHW096chH8dwoVDnb9RSA2fREOovhtHyFk6izR+G4qXyiFWw+nGCcnl+n1dH+o636+qPvBtx7k9ZH6uj82zpm02dHm49FsDrokoy6iGb8JrhxGbfsBG7ntHK2Kj1Oy5fTeLF8gg1I556Rrfu/cZ6y+qe7Lr9Oq6Pf7anX+/uO6/zhg/R3o/Mcl75E6Mpj3z4+neo0kpZXgpsw3B7cAEAsUmHQhbcxQ7EHOG0WTq+hfJFqSnJp60gdBDcOE+ssnAzW5jhtltPuqEMAsAaXpRwm2pfhOXmwdiInvnSROgSAyKIZv3kU3EFimYXTaVOSO5nTZjntQh0CgLUIbhwk2lk4nTpYO5XTZjmVqEMASASCGweJdhZOJw7WTua0WU4l6hAAEoHgxkGinYXTiYO1kzltllOJOgSARCC4cZhopuN24mDtdE6b7pw6BADr8bSUQ5mdcMtpU5K7hZMmNAtVh3de+kUdc8GrNXiEPTaUm3sFq7tUrc9k5ts1MxQvXbpUS5cuVU1NjSTpnHPO0b333qsrrrgi5G9eeOEFffe731VNTY1Gjx6tH/7wh/rKV75i+m+6JbiJlhMG61TdueMV7FHwQZnpAe+scWLAyiPssfF6uSWqH7Czf+n624Mz0/Xwb/YE1N3ZBVnKSE9T5f4G/7Ke9dk97YZhuLKf7Fn+drRj1wQ3r732mtLS0jR69GgZhqGnn35aDz74oHbs2KFzzjmn1/rvvfeepk2bpoqKCn31q1/Vc889px/+8Id6//33de6555r6m14NbuxEZx0fMy82tHsunmCcOGeQG3i13BLVDwTb7uQRg/TE3MkJn9g02N82o6s+f1o+Iezv3dBPhir/9L59tHnvsaS2Y9cEN8EMHjxYDz74oObPn9/ru2uuuUYtLS16/fXX/csuvPBCTZgwQY899pip7RPcWI/OOnZ765t18cPrTa+/9q4Zjjjai5Rup6TTabxcbonqB+Ys26J399Srs8fyQZnpWnfXzF77opX7rdk3qocyuXiQ3q9tCPl7N/SToco/nES1Y1dO4tfR0aHnn39eLS0tKisrC7rOpk2bNGvWrIBll112mTZt2hRyu21tbWpqagr4wDpenqclGZPrRXoUvCenPBrOI+yx8Wq5Jaof6NpusIH1eGu7blixtddyq/bbUHmKxtaa42F/7/R+Mlz5h+OEdmx7cLNz504NGDBAGRkZuummm/TSSy9p7NixQdetq6vT0KFDA5YNHTpUdXV1IbdfUVGhnJwc/6eoqMjS9Kc6Ouv4RHoUvCenPBrOI+yx8Wq5JaofiLTdrTXHA/ZFK/fbaA884uHUfjLWMnBCO7Y9uBkzZowqKyu1efNm3XzzzZo7d67++Mc/Wrb9xYsXq7Gx0f/Zv3+/ZdsGnXW8Qj0K3pPTHg3nEfbYeLXcEtUPmAn+u++LVu630R54dJfm82nSiEGm13dqPxltGTipHdse3PTr10+jRo3SxIkTVVFRofHjx+uRRx4Jum5BQYEOHToUsOzQoUMqKCgIuf2MjAxlZ2cHfGAdOuv4BZt7Z1CPewPsnIsnFKfNGeQWXiy3RPUDI/MGRAwSuu+LVu63Zg88gpk6KlfL5k6O+Hun95Nmyr87J7Vjx91QfPHFF2v48OFavnx5r++uueYatba26rXXXvMvu+iiizRu3DhuKLaRV+faSfaN0j0f53fC4/1muCWdTuO1cktUP9DY2q4ZD60NmBpBktJ80tRReb32RSv321B5umn6SN3/1p+068Df7uGcXDxIcy8q1jnDcvz1Gez33bmhn4xU/t+/6pyktWPXPC21ePFiXXHFFRo+fLhOnDjhf7T7nXfe0SWXXKI5c+bojDPOUEVFhaS/PAo+ffp0PfDAA7ryyiv1/PPP6/777+dRcIdIVmdt9hHPeB8F9WrQBiRSIvqBxr/ePLy15rh/Wah9MRH7bag8mc1r9/UkuS6ojab8E8k1wc38+fO1evVqHTx4UDk5ORo3bpzuvvtuXXLJJZKkGTNmqLi4OOAszgsvvKDvfOc7/kn8fvSjHzGJX4ow+4in1Y9we+0IG3CraPbFWPZbr01EavXkgXb3ha4JbuxAcONeZk83e3XeHQCJ4bWJSCNNPujWvLlynhsgHLOPeHp53h0AiZGMOa2SKVh+unNz3swiuEFYe+ubtXb34YhBgdn1YmX2EU8z6yU6rYlkRdrdnH8El8g69Xp78doBkZnJB92at2j0tTsBXuKl67XJvL/FTLmZfcQz0no/X1OlrbX23hQXi4bWk7pxxba4bugLVVdWvH3cS23fTRJ5OcVrl2pCMXNA5KY2Hc3Ee27LWzS458YCXuwEknF/S7TlFm+ask/rq6bPTkWdVrsH7obWk5r50DrTj8KGYuY9OVYFTG5u+25i9f1l3dv6fa/8wfH3rnWlN83nU4dhxLSPeu19X9G8r85teYtm/ObMjQXCXa91SicQja7Tmj11P5VZknu66fVCibbclpSX9nrEM9ikUcHWO3/EwICzHmbS6pSB+4ant/UKbCSpw5CpcpZC12lP0bZbr7X9ZIsncI53/+vO7NuvY9l2IoRLb7T7aNdkfaECOTcN/lLo/HTn1rxFg+AmTlZ2ME5h9jRtPKdzYym3nMx0rZg/JeLjiMHWqznaonlP9X7JXri0OmHg3lvfrG21vYOy7sycWjZ7qjqaduvFtp8sVgTOVl5OiXQDajzbNiuaQC9cemPZR80eONnNbBkFy093Tsyb1Qhu4uS167WSdfe3hJvqPJ5yK8k1d5Tbfb1IV197ptUpA7eZoMTMlPLRviPGioDJjW0/WawInK161YDZs3qxbNuMaAO9SOmNZR81e+Bkl2jLKFh+JPdNHhgPnpaKkxdfHGn2PTHxvE8m2eUWbVqd8rbzSOU0uXiQqY4q2vfkWBEwubHtJ4NVT+dY9T6naG5ATcS7kKJ9DNtsemPZR0tyT9fMMfmOG/xjfVS9e36cmrdEIbiJk1dfHGn25X6xvgTQjnKLJq1OGbi7yinYjjooM11PzJlselvB8t9TNOXv1bafaFYGzla8hDOas3pWX86IJdAzm16vBNdee1Q9WbgsZQG3XK+NRjz3t5gd1JJdbtGk1Uk3GQYrp8kjBumJuZOjurG5Z/6HnN5PD73zUVzl78W2n2hWBs5WXE6J1NYT+WLEWC5tRrph1ms3y3L5NzY8Cm4hp16vdTqnlpvTXpyZqHKyYrtOrUOnctorQuxq67E+hh3ubdtem4rAa4+qx4N3S4XBu6UQLQZuWM1pgXMXO9p6PIFeV3r79vHpVGds89y4gdOCYbsQ3IRBcAPAKQicnRvoOQll9BcEN2EQ3ACA8xDoRZbqZcQMxQAAVzE7f1Uqo4zM41FwAADgKQQ3AADAUwhuAACApxDcAAAATyG4AQAAnkJwAwAAPIXgBgAAeArBDQAA8BQm8fOwvfXNqj3WmrKzWQJS+P3AzD7CfgS4D8GNBzW0ntTClZUB7yE5d1i27v/GeRpXNNC+hMEz3DDgf7D/uO55aZd2fdrkX9b1Ph5DRq99pOe7eoLtR6n4Ph8klxv2LTfg3VI2SlQjDvYG2S50zvbwSoflhgE/WBq7dL1JWVLEtyzzJmYkU7B2O7l4kOZeVKxzhuUkvN9wQx/FizPDcEJwk8gBYm99sy5+eH3I7/v4pC+NyjPdObuhwVslEXl1QzAQDacN+N3rzDAM1R5r1c/XVGn7vuPqjLFnW3vXDBmGEXY/euCb5+mCkUOSsk+k0j6YysIdlEqJ6zfc1Efx4kyHW7iyUhurjgQs21h1RLet3BH3AFF7rDXs952GtGFPvaqPtITtKKNt8E7ugCOlLZq8RpvPRNZ1su2tbw56NqTDMEy1KSuFOzsTr5qjLRHX+favd0pK7CDgpkEH8Qm1b3WXqH7DS31UdwQ3SZaoAaJr0E3z+Uyt/+oHB/T18WeE/FtmG7yTO+BQabvz0tE61truD1DM5DWWfDopGLBCpMC55mjy8hOszqzSdRbIjEQOAk4YdKw4aOm5jVi36eQDqHhF2rekxPQbXuujuiO4STKrB4hgg+6gzHQ1trarM8zvfrxqj368ak/QATqaBu+EDjiUYGnbsKe+1zXtrTXHe/22Z15jyadTggGrBoURgzPDfl88JDmdoJmj3FD6SPrS6DxJoe+56SqjaaPzwl4mkBI3CNg96Fhx0BKqbzre2h7VNp18AGWVSPtWd1b2G07poxKBeW6SzOoBItig29jabnqn7xqguzPT4KW/dcA9O//uHbBdQqWtp+21vQOb7mqOtsScT7uDgYbWk5qzbIsufni95j21VTMfWqc5y7aosdvgEo2ReQM0bXRer7ODaT6fpo3OS1onaOYoN5Qv/XVQXFJe6r+xuMvUUblaUl7q/3+wdUIxcykrGmb3wUQJF8zHs43jPdqemW1akRanC7VvBWNlv2F3H5VIBDdJZuUAEWrQ7dRfOpHH/vl8nXtG+Juugg3QZhu83R1wOGYHwEg3nRYPOT3mfNodDCRiUDATFCRaNEe5Xc49I1uv3jpVK+ZPUU5munIy07Vi/hStvWuGnpo3WWvvmuH/rkv3dSq+eV7Y7Vs9CNg56Fhx0GL24CLSNp18AGW1SMF0IvoNu/uoROKylA2WlJfqtpU7Ak6zxjJARBp0M9LT9Pptf6fqIy169YMD+vGqPSHX7X76savBRzpt7+SoP9oBsI8UcBmve14j3X8RLp9W1XW0EnVZo2vArz7SopqjLbbc/xCqfXaX5vPp/BEDdcvMUWHTWJIbOf1d67y1sy7iPmEVs/tgIlhxqSLas2uhtunlyyY9dd+3/nigUcvfq9HWbmeWE9Vv2NVHJRrBjQ2sGiDMBhcluafra+OGhQ1ueg7QZhq8nR1wJGYGwO4mjhgUsiOJJ592BQOJHhTMBAWJFKx9dtdVf1bek5HsQcCuQceKg5ZoDy5CbdPJB1CJ0rVvXTl+WFL6DSccsCQC89y4XDTzjsQyR0mkBt/Y2t6rA3bKzX7B0tZT9/yHy6uT8xlMpPmO1t41wxMdWPc6k5SUzjnZg4Adg44V8xlFmrfF7DadNrcS7MMkfmF4LbiJZtBN5ADt5Ki/K21DMvvpod98FFf+nZzPnhgUECsr+opg24jlaSm3HVggcQhuwvBacNMlmkHXTQN0IqRK/hkUEC8r9pWe24h1m6my3yI0gpswvBrcAKEwKADwAl6/AMDP7pt/ASDZmOcGAAB4CsENAADwFIIbAADgKQQ3AADAUwhuAACApxDcAAAATyG4AQAAnmJrcFNRUaHJkycrKytL+fn5mj17tnbv3h32N8uXL5fP5wv49O/fP0kpBgAATmdrcLN+/XotWLBAv//977Vq1Sq1t7fr0ksvVUtLS9jfZWdn6+DBg/5PbW1tklIMAACcztYZit9+++2A/y9fvlz5+fnavn27pk2bFvJ3Pp9PBQUFiU4eAABwIUfdc9PY2ChJGjx4cNj1mpubNWLECBUVFemqq67SH/7wh5DrtrW1qampKeADAAC8yzHBTWdnp+644w5NnTpV5557bsj1xowZoyeffFKvvPKKnnnmGXV2duqiiy7SJ598EnT9iooK5eTk+D9FRUWJygIAAHAAx7wV/Oabb9Zbb72ld999V2eeeabp37W3t+vss89WeXm5/uM//qPX921tbWpra/P/v6mpSUVFRbwVHAAAF3HdW8FvvfVWvf7669qwYUNUgY0kpaenq7S0VFVVVUG/z8jIUEZGhhXJBAAALmDrZSnDMHTrrbfqpZde0po1a1RSUhL1Njo6OrRz504VFhYmIIUAAMBtbD1zs2DBAj333HN65ZVXlJWVpbq6OklSTk6OTjvtNEnSnDlzdMYZZ6iiokKS9IMf/EAXXnihRo0apYaGBj344IOqra3VDTfcYFs+AACAc9ga3CxdulSSNGPGjIDlTz31lK677jpJ0r59+9Snz99OMB0/flw33nij6urqNGjQIE2cOFHvvfeexo4dm6xkAwAAB3PMDcXJEs0NSQAAwBmiGb8d8yg4AACAFQhuAACApxDcAAAATyG4AQAAnkJwAwAAPIXgBgAAeArBDQAA8BSCGwAA4CkENwAAwFMIbgAAgKcQ3AAAAE8huAEAAJ5CcAMAADyF4AYAAHgKwQ0AAPAUghsAAOApBDcAAMBTCG4AAICnENwAAABPIbgBAACeQnADAAA8heAGAAB4CsENAADwFIIbAADgKQQ3AADAUwhuAACApxDcAAAATyG4AQAAnkJwAwAAPIXgBgAAeArBDQAA8BSCGwAA4CkENwAAwFMIbgAAgKcQ3AAAAE8huAEAAJ5CcAMAADyF4AYAAHgKwQ0AAPAUghsAAOApBDcAAMBTCG4AAICnENwAAABPIbgBAACeQnADAAA8heAGAAB4iq3BTUVFhSZPnqysrCzl5+dr9uzZ2r17d8TfvfDCCzrrrLPUv39/nXfeeXrzzTeTkFoAAOAGtgY369ev14IFC/T73/9eq1atUnt7uy699FK1tLSE/M17772n8vJyzZ8/Xzt27NDs2bM1e/Zs7dq1K4kpBwAATuUzDMOwOxFd6uvrlZ+fr/Xr12vatGlB17nmmmvU0tKi119/3b/swgsv1IQJE/TYY49F/BtNTU3KyclRY2OjsrOzLUs7AHvsrW9W7bFWFQ85XSW5pyf99wCSI5rxu2+S0mRKY2OjJGnw4MEh19m0aZMWLVoUsOyyyy7Tyy+/HHT9trY2tbW1+f/f1NQUf0KTiI7XPair5GpoPamFKyu1YU+9f9m00XlaUl6qnMz0hP9eck6dOyUdiI5X6s2J+XBMcNPZ2ak77rhDU6dO1bnnnhtyvbq6Og0dOjRg2dChQ1VXVxd0/YqKCn3/+9+3NK3JYEXHaxcnNvREcnNdhWOmHu2s64UrK7Wx6kjAso1VR3Tbyh1aMX9KQn/vlDp3Sjrcwil9k1fqzcn5cExws2DBAu3atUvvvvuupdtdvHhxwJmepqYmFRUVWfo3EiHejtsOTmnoye7A7KyrcHmNtRzM1KPddb23vjngb3fpMAxt2FOv6iMtYfMc7++D1fm7VfW6YcVWvXDTRVHkJD529xNOCRYisbu99mR3vVnFyflwRHBz66236vXXX9eGDRt05plnhl23oKBAhw4dClh26NAhFRQUBF0/IyNDGRkZlqU1GeLteO1id0O3owOzq67C5dWQEVc5mKlHu+u69lhr2O9rjoYv93h+H6rOOw1pa81xXb30PT0xd3LEsrbiXiG7+olo9jUnBEB2t9fuElVvyS5np49Ttj4tZRiGbr31Vr300ktas2aNSkpKIv6mrKxMq1evDli2atUqlZWVJSqZSWem43Warobe0eP+9O4NPdHCdWCJYlddhctrPOVgph6dUNcjBmeG/b54SPhONZ7fR6rz7bXHw5Z1Q+tJzVm2RRc/vF7zntqqmQ+t05xlW9TY2h52u9GmI5H9hJk2ZlU+4+WE9tqd1fVmVzk7fZyyNbhZsGCBnnnmGT333HPKyspSXV2d6urq9Nlnn/nXmTNnjhYvXuz//+233663335bDz/8sP785z/re9/7nrZt26Zbb73VjiwkRLwdtx3sbuh2dWB21FWkvMZTDmbq0e66lqSReQM0bXSe0ny+gOVpPp+mjc6LeMQYz+8j1XmnFLasrQrC7eonzO5rdhxsBOOE9tqd1fVmVzk7fZyyNbhZunSpGhsbNWPGDBUWFvo/v/jFL/zr7Nu3TwcPHvT//6KLLtJzzz2nxx9/XOPHj9eLL76ol19+OexNyG4Tb8dtB7sbul0dmB11FSmv4UQqBzP1aHddd1lSXqqpo3IDlk0dlasl5aUJ/X1XnffxhV0taFlbGYTb1U+Y2decdLbEKe21i5X1Zmc5O32csvWeGzNT7Kxbt67XsquvvlpXX311AlLkHEvKS3Xbyh0B1zSj6biTrauhb6w6ErCjpfl8mjoqN+EN3c4OLNl1FSmv4UQqB7P1aGddd8nJTNeK+VNUfaRFNUdbor7XIJ7fLykv1Q0rtmprzfGQ6wQr63jvFQqWjmT3E2b2tUhBdLT5jIfdfVMwVtWb1e0pWk4epxxxQzF6i7fjtoOdDd3ODizZdRUpr5LiKgcz9eikTq0kN77yjuX3OZnpeuGmi3T10ve0vfa4Ort9F66srQ7C7egnzOxrkQ5ck322xEntVbKu3uw+K+XkccpRMxQnAzMUJ55dDb2xtb1XB+aUOResFi6vkiwpBzP16MROLZliaXNzlm0JGRjY/fisWWby7cR8erG9OrGcEyWa8ZvgBp7jxQ4slHB5TaVysFs0Ze2lIDxcvr2UTydLpXImuAmD4AaAE6RK8Jkq+bRbKpQzwU0YBDcAALhPNOO3rY+CAwAAWI3gBgAAeArBDQAA8BSCGwAA4CkENwAAwFMIbgAAgKcQ3AAAAE8huAEAAJ5CcAMAADyF4AYAAHgKwQ0AAPCUqIObkSNH6ujRo72WNzQ0aOTIkZYkCgAAIFZRBzc1NTXq6OjotbytrU0HDhywJFEAAACx6mt2xVdffdX/73feeUc5OTn+/3d0dGj16tUqLi62NHEAAADRMh3czJ492//vuXPnBnyXnp6u4uJiPfzww5YlDAAAIBamgpsPP/xQ7e3tSktLU0lJibZu3arc3NxEpw0AACBqpu65KS0t1bFjxyRJPp9PPp8voYkCAACIlangZuDAgdq7d68kqba2Vp2dnQlNFAAAQKxMXZb61re+penTp6uwsFCSNGnSJKWlpQVdtysIAgAAsIOp4Obxxx/XN7/5TVVVVWnhwoW68cYblZWVlei0AQAARM3001KXX365JGn79u26/fbbCW4AAIAjRT2JX6ibiVtaWnT99dfHnSAAAIB4RB3cPP300/rss896Lf/ss8+0YsUKSxIFWGlvfbPW7j6s6iMtdicloZKdz1QpV5hHm4BTmL4s1dTUJMMwZBiGTpw4of79+/u/6+jo0Jtvvqn8/PyEJBKIRUPrSS1cWakNe+r9y6aNztOS8lLlZKbbmLLg9tY3q/ZYq4qHnK6S3NNN/y7Z+XRbucYi1rrwGrPlkAptws1SsT37DMMwzKzYp0+fsPPb+Hw+ff/739c999xjWeISoampSTk5OWpsbFR2drbdyUE3Vu+Ac5Zt0caqI+ro1sTTfD5NHZWrFfOnxL39UKLNR7wDQ7LzafbvubFDTYVB2ky9RFsOdu1rqS5SXXqtPUczfps+c7N27VoZhqGLL75YL774ooYMGeL/rl+/fhoxYoROnToVe6qRshKxA+6tbw7YXpcOw9CGPfWqPtJi+YAbaz4WrqzUxqojAcs2Vh3RbSt3RBwYkp1PM39vUGa6azvUW559X+99fDRg2YY99br52e167sYLbUqVNaJpn9G0STv2tVRnti7j6VvczvQ9N9OnT9eMGTMkSWVlZZo+fbr/U1ZWpoyMDJWUlCQqnfCwcDtgrGqPtYb9vuao9fcExJKProGho8cJ1O4DQzjJzqeZv5eI+kyGvfXNvQKbLu99fNT195GYrZdo26Qd+1qqM1OX8fYtbhf1DcWS1Ldv7xM+zc3NAffhAGYkagccMTgz7PfFQ6w9kow1H/EODMnOZ6S/l+aTazvUzdXBAxv/93vDf+9k0bTPaNtksttgqjNbl6kedJq+LLVo0SJJf7m35rvf/a4yM//WoDs6OrR582ZNmDDB8gTC28zsgLGc0h6ZN0DTRueFvA/A6tPkseYj3oEh2fmM9Pc6ItzBF2t9Jkf4d+aZujnRoaJpn9G2yWS3wVRnti5TPeg0feZmx44d2rFjhwzD0M6dO/3/37Fjh/785z9r/PjxWr58eQKTCi9K5A64pLxUU0cFvr1+6qhcLSkvjXmbocSaj66BIa3HzfppPp+mjc4zNTAkM5+R/p6bO9QLSgaH/f7CkUPCfu9k0dRLLG0y2W0wlZmtSyv6FjeL6oZiSZo3b54eeeQRnjSCJRJ51JeTma4V86eo+kiLao62JPSpnXjysaS8VLet3BFwc2A0A0My8xnp7+Vkprv2KH5k3gCVjRyiTUEuP5WNHOLotEcSbfuMtk0muw2msmjqMt6+xc1MPwruFTwK7jyNre29dkC3PF3TXbz58MrA4Ob6dGvazTzeHUvevNIm7Wb1tAjR1qVX6jGa8ZvgBo7hlR3QK/mIl5vLwS1pj2X6AbfkzQsSPc9MqtUlwU0YBDcAvILJ85yN+rFWNON3TI+CAwDslerzmDgd9WMvghsAcKFUn8fE6agfexHcAIALufmx+1RA/diL4AYAXCjV5zFxOurHXgQ3AOBSTJ7nbNSPfXhaCgBcLtUeCXYb6sca0YzfpmcoBgA4U0kug6aTUT/JZ+tlqQ0bNuhrX/uahg0bJp/Pp5dffjns+uvWrZPP5+v1qaurS06CAQCA49ka3LS0tGj8+PF69NFHo/rd7t27dfDgQf8nPz8/QSkEAABuY+tlqSuuuEJXXHFF1L/Lz8/XwIEDrU8QAABwPVc+LTVhwgQVFhbqkksu0caNG8Ou29bWpqampoAPAADwLlcFN4WFhXrsscf0q1/9Sr/61a9UVFSkGTNm6P333w/5m4qKCuXk5Pg/RUVFSUwxAABINsc8Cu7z+fTSSy9p9uzZUf1u+vTpGj58uP7v//4v6PdtbW1qa2vz/7+pqUlFRUU8Cg4AgIuk1KPgU6ZM0bvvvhvy+4yMDGVkZCQxRQAAwE6uuiwVTGVlpQoLC+1OBgAAcAhbz9w0NzerqqrK///q6mpVVlZq8ODBGj58uBYvXqwDBw5oxYoVkqSf/OQnKikp0TnnnKPPP/9cTzzxhNasWaPf/OY3dmUBAAA4jK3BzbZt2zRz5kz//xctWiRJmjt3rpYvX66DBw9q3759/u9PnjypO++8UwcOHFBmZqbGjRun3/72twHbAAAAqc0xNxQnC++WAgDAfaIZv11/zw0AAEB3BDcAAMBTCG4AAICnENwAAABPIbgBAACeQnADAAA8heAGAAB4CsENAADwFIIbAADgKQQ3AADAUwhuAACApxDcAAAATyG4AQAAnkJwAwAAPIXgBgAAeArBDQAA8BSCGwAA4CkENwAAwFMIbgAAgKcQ3AAAAE8huAEAAJ5CcAMAADyF4AYAAHgKwQ0AAPAUghsAAOApBDcAAMBTCG4AAICnENwAAABPIbgBAACe0tfuBACpaG99s2qPtap4yOkqyT3d7uQAgKcQ3KQoBld7NLSe1MKVldqwp96/bNroPC0pL1VOZrqNKQMQCv2l+xDcpBgGV3stXFmpjVVHApZtrDqi21bu0Ir5U2xKFYBg6C/di3tuUky4wRWJtbe+WRv21KvDMAKWdxiGNuypV/WRFptSFru99c1au/uwK9OO+KRC3dNfuhdnblJI1+DaU/fBlVOuiVN7rDXs9zVH3VP+HNGmrlSpe/pLd+PMTQoxM7gicUYMzgz7ffEQ93SUHNGmrlSpe/pLdyO4SSFeGlzdaGTeAE0bnac0ny9geZrPp2mj81xzFOjFy2swJ5Xqnv7S3QhuUohXBlc3W1JeqqmjcgOWTR2VqyXlpTalKHoc0aauVKp7+kt3456bFLOkvFS3rdwRcC3ZbYOrm+VkpmvF/CmqPtKimqMtrny0lCPa1JVqdU9/6V4ENynGC4OrF5Tkurfcu45oN1YdCbg8kebzaeqoXNfmC5GlWt3TX7qXzzB6XDz1uKamJuXk5KixsVHZ2dl2JwdwpcbW9l5HtF58Yga9UfewSzTjN8FNkjHTpTmUkzl2lxNHtKmLukeyRTN+c1kqSVJlboh4UU7mOKWc3Hx5DfGh7uFkPC2VJKkyN0S8KCdzKCcACI3gJglSaW6IeFBO5lBOABAewU0SpNLcEPGgnMyhnMxLhfcfAeiNe26SINXmhogV5WQO5RSZU+5JAmAPW8/cbNiwQV/72tc0bNgw+Xw+vfzyyxF/s27dOp1//vnKyMjQqFGjtHz58oSnM17MdGkO5WSOleXk1TMb3JMEpDZbg5uWlhaNHz9ejz76qKn1q6urdeWVV2rmzJmqrKzUHXfcoRtuuEHvvPNOglMaPy9Mu58MlJM58ZZTQ+tJzVm2RRc/vF7zntqqmQ+t05xlW9TY2p6I5CYV9yQBcMw8Nz6fTy+99JJmz54dcp27775bb7zxhnbt2uVf9o//+I9qaGjQ22+/berv2D3PDXNDmEM5mRNrOc1ZtiXkLLMr5k9JRFKTZu3uw5r31NaQ3z81b7JmjslPYooAWMGz89xs2rRJs2bNClh22WWX6Y477gj5m7a2NrW1tfn/39TUlKjkmeKEuSHsnvjNDCeUkxvEUk5dZzZ66n5mw81lzz1JAFwV3NTV1Wno0KEBy4YOHaqmpiZ99tlnOu2003r9pqKiQt///veTlURH4yZLSOaetnJzcJNq7z8C0JvnHwVfvHixGhsb/Z/9+/fbnSTbcJMlpNQ4s8G9W0Bqc9WZm4KCAh06dChg2aFDh5SdnR30rI0kZWRkKCMjIxnJc6y99c3aXH3U05ciYF4qnNngbc5AanNVcFNWVqY333wzYNmqVatUVlZmU4qcLdhlqFDcfikC0VlSXtrrzc5ePLPBvVtAarI1uGlublZVVZX//9XV1aqsrNTgwYM1fPhwLV68WAcOHNCKFSskSTfddJN+9rOf6d/+7d90/fXXa82aNfrlL3+pN954w64sOFqwy1CheOFSBMyz4syGG25MB5CabA1utm3bppkzZ/r/v2jRIknS3LlztXz5ch08eFD79u3zf19SUqI33nhD//Iv/6JHHnlEZ555pp544glddtllSU+704V6IqYnL12KQPRiObPBjempjaAWbuCYeW6Sxe55bpIl0lwfXRiUEC0vz5GD0AhqYTfPznMD8yI9EVPxzfN04cghHHkhKl6fIwehhXvakqAWTuP5R8FTVaT3D5VPGc4ghKjxRvLUxCst4DYENx7GXB+wWirMkYPeCGrhNlyW8jDm+oDVUmGOHPRGUAu34cxNCijJPV0zx+Qz8MASnBFMPZEuc9O3wGl4WgpATDgjmFoaW9t7TfzI01JIpmjGb4IbAIBpBLWwC4+CAw7FBGhwO15pERz7trMQ3ABJwARogDexbzsTNxQDPeytb9ba3Yctnbsj3ARoTpKIvANe5pZ9O9Vw5gb4q0QdgblhVl+OPhOPyxbOYGU9uGHfTlUEN8BfJWp6eTMToNndATK1fuIQODpDIurBDft2quKyFKDETi/v9AnQmFo/sbhs4QyJqAen79upjOAGMfHavRmJnF7e6ROgMbV+4hA4OkOi6sHp+3YqI7hJQfEEJg2tJzVn2RZd/PB6zXtqq2Y+tE5zlm1RY2t7AlKaPIk+AnPyrL6R8t63jy/s9wiNwDGyWPqjaH+TyHpw8r6dyrjnJoV8sP+47nlpl3Z92uRfFu01Z6/em5HodyYl4j1fVt0YGSrvXf7fsi3cIxIjLluEFss9MLHeN5PIekjld/g5+SZ5ZihOAcE6hC5dg7eZwGRvfbMufnh9yO/X3jXDcQ08Gm6ZXj4RN0YGy3t30bQTBJqzbEvIoDmVyzOWcomnLGP9rZMHcLvYdZN8NOM3l6VSwMKVlXq3KvigFc01Z6+fYu86Alt71ww9NW+y1t41QyvmT3FUYCMl5sbIrryvuD54J889IrHjskVvsdwDE+99M9HWg1cvwVvBDTfJc1nK40LNw9CTmUcWU+UUu5Onl0/0vBrBLkt1l0qPtlp1xH60pU3zvlSsG6eV6FSnEXR7qXZ2IJZHqON97Dray0devQQfjWDt0kwfZBiG7e2Z4MbjInUIXcwEJom+LwWRJXpejVQJYMOx6pR7uO1Y/bfcJpZ2ZlXbNHPwkuqT84Vrl5H6oNtWvq9dB2K/r9MqXJbyuEgdQh8pqkcWOcVur0QHHzzaat0pdzPbccPp/USIpZ0ls216/RJ8JOHaZaQ+6I/dHljp/rtkI7jxuFAdQpcv9TiSjMQt96V4VTI6+FQOYK2aD8XMdlJ9DpxY2lmy2mYqn8GM1C59f+1revZBXcFEZ48r23a1Zy5LpYAl5aW9noQ594xs3f+N8zTuzIExbdPJ96V4XbD6tLKDT+VHW6267GfFkb/X72+KpZ0lq2265RJ8Iu7VMtN2g/VBY4dlB0wzEux3ySw3gpsUkMqDlRclqz5TMYC16ojdzHYizcLh5bMD3cXSzpLRNhN9EBGPRN6rZabtBuuDDMMIO1VIstszwU0KScXBysuoT+tZdcRudjtuODuQqpx8UJjIJ7mi2Qd69kFOas/ccwMA3Vh1X4eZ7aTy/U1uUZJ7umaOyXdMYJOMe7VibZdOas/MUAwAQVh1xG5mO048OwBnWrv7sOY9tTXk90/Nm6yZY/It+VuxtstEtedoxm8uSwFAEFZd9jOzHS4xwqxkPskVa7t0QnvmshQAIKxY3tyNxGAuKnM4cwMACCpVZ1B2Oic/yeUU3HMDAAiKN5o7W6rdq8VbwQEAcUn1GZTdIMXOTUSFy1IAgF4S/ZLWZHPrm9eDpZvLhZER3AAAevHK+5XcGgiES3ciJ/HzCi5LAQB68cpTOW5983qodN+wYiuXC00guAEABOWkGWdj4db7hsKle2vN8bC/NfNS1lTAZSkAQFBOfr+SGW69byhSusNxy+XCRCO4AQCE5YQZZ2Ph1vuGIqV78ohBen9fgyNeUOlUXJYCAHiSW+8bipTuJ+ZOdvXlwmRgEj8AgGc1trb3ms3XDU9LmUm3Wy8Xxiqa8ZvgBgDgeW4NBNya7kTgreAAAHTj1vuG3Jpuu3HPDQAA8BSCGwAA4CkENwAAwFMcEdw8+uijKi4uVv/+/XXBBRdoy5YtIdddvny5fD5fwKd///5JTC0AAHAy24ObX/ziF1q0aJHuu+8+vf/++xo/frwuu+wyHT58OORvsrOzdfDgQf+ntrY2iSkGAABOZntw89///d+68cYbNW/ePI0dO1aPPfaYMjMz9eSTT4b8jc/nU0FBgf8zdOjQJKYYAAA4ma3BzcmTJ7V9+3bNmjXLv6xPnz6aNWuWNm3aFPJ3zc3NGjFihIqKinTVVVfpD3/4QzKSCwAAXMDW4ObIkSPq6OjodeZl6NChqqurC/qbMWPG6Mknn9Qrr7yiZ555Rp2dnbrooov0ySefBF2/ra1NTU1NAR8AAOBdtl+WilZZWZnmzJmjCRMmaPr06fr1r3+tvLw8/c///E/Q9SsqKpSTk+P/FBUVJTnFAJAce+ubtXb3YVUfabE7KYCtbJ2hODc3V2lpaTp06FDA8kOHDqmgoMDUNtLT01VaWqqqqqqg3y9evFiLFi3y/7+pqYkAB4CnNLSe1MKVla57fxKQKLaeuenXr58mTpyo1atX+5d1dnZq9erVKisrM7WNjo4O7dy5U4WFhUG/z8jIUHZ2dsAHALxk4cpKbaw6ErBsY9UR3bZyh00pAuxl+7ulFi1apLlz52rSpEmaMmWKfvKTn6ilpUXz5s2TJM2ZM0dnnHGGKioqJEk/+MEPdOGFF2rUqFFqaGjQgw8+qNraWt1www12ZgMAbLG3vjngjE2XDsPQhj31qj7SwruJkHJsD26uueYa1dfX695771VdXZ0mTJigt99+23+T8b59+9Snz99OMB0/flw33nij6urqNGjQIE2cOFHvvfeexo4da1cWAMA2tcdaw35fc5TgBqnHZxiGYXcikimaV6YDgNPtrW/WxQ+vD/n92rtmENzAE6IZv133tBQA4G9G5g3QtNF5SvP5Apan+XyaNjqPwAYpieAGAFxuSXmppo7KDVg2dVSulpSX2pQiwF6233MDAIhPTma6VsyfouojLao52qLiIadzxgYpjeAGADyiJJegBpC4LAUAADyG4AYAAHgKwQ0AAPAUghsAAOApBDcAAMBTCG4AAICnENwAAABPYZ4bAACitLe+WbXHWpkw0aEIbuBadC4Akq2h9aQWrqzUhj31/mXTRudpSXmpcjLTbUwZuiO4gevQuQCwy8KVldpYdSRg2caqI7pt5Q6tmD/FplShJ+65geuE61wAIFH21jdrw556dRhGwPIOw9CGPfWqPtJiU8rQE8ENXIXOBYBdao+1hv2+5ij9j1MQ3MBV6FwA2GXE4Myw3xcP4d4/pyC4gavQuQCwy8i8AZo2Ok9pPl/A8jSfT9NG5/Fgg4MQ3MBV6FwA2GlJeammjsoNWDZ1VK6WlJfalCIE4zOMHjcveFxTU5NycnLU2Nio7Oxsu5ODGDS2tuu2lTt4WgoQUyLYpfpIi2qOtlDuSRTN+E1wA9eic0EqY0oEpJpoxm8uS8G1SnJP18wx+QQ2SElMiQCERnADAC7DlAhAeAQ3AOAyTIkAhEdwAwAuw5QIQHgENwDgMkyJAIRHcAMALsR8K0BovBUcAFwoJzNdK+ZPYUoEIAiCGwBwsZJcghqgJy5LAQAATyG4AQAAnkJwAwAAPIXgBgAAeArBDQAA8BSCGwAA4CkENwAAwFMIbgAAgKcQ3AAAAE8huAEAAJ6Scq9fMAxDktTU1GRzSgAAgFld43bXOB5OygU3J06ckCQVFRXZnBIAABCtEydOKCcnJ+w6PsNMCOQhnZ2d+vTTT5WVlSWfz2d3cpKmqalJRUVF2r9/v7Kzs+1OTsqg3O1BuduDcrdHqpS7YRg6ceKEhg0bpj59wt9Vk3Jnbvr06aMzzzzT7mTYJjs729ON36kod3tQ7vag3O2RCuUe6YxNF24oBgAAnkJwAwAAPIXgJkVkZGTovvvuU0ZGht1JSSmUuz0od3tQ7vag3HtLuRuKAQCAt3HmBgAAeArBDQAA8BSCGwAA4CkENy62dOlSjRs3zj+3QVlZmd566y3/959//rkWLFigIUOGaMCAAfrWt76lQ4cOBWxj3759uvLKK5WZman8/Hz967/+q06dOpXsrLjaAw88IJ/PpzvuuMO/jLK33ve+9z35fL6Az1lnneX/njJPnAMHDuif//mfNWTIEJ122mk677zztG3bNv/3hmHo3nvvVWFhoU477TTNmjVLe/bsCdjGsWPHdO211yo7O1sDBw7U/Pnz1dzcnOysuEZxcXGv9u7z+bRgwQJJtPeIDLjWq6++arzxxhvGRx99ZOzevdv493//dyM9Pd3YtWuXYRiGcdNNNxlFRUXG6tWrjW3bthkXXnihcdFFF/l/f+rUKePcc881Zs2aZezYscN48803jdzcXGPx4sV2Zcl1tmzZYhQXFxvjxo0zbr/9dv9yyt569913n3HOOecYBw8e9H/q6+v931PmiXHs2DFjxIgRxnXXXWds3rzZ2Lt3r/HOO+8YVVVV/nUeeOABIycnx3j55ZeNDz74wPj6179ulJSUGJ999pl/ncsvv9wYP3688fvf/9743e9+Z4waNcooLy+3I0uucPjw4YC2vmrVKkOSsXbtWsMwaO+RENx4zKBBg4wnnnjCaGhoMNLT040XXnjB/92f/vQnQ5KxadMmwzAM48033zT69Olj1NXV+ddZunSpkZ2dbbS1tSU97W5z4sQJY/To0caqVauM6dOn+4Mbyj4x7rvvPmP8+PFBv6PME+fuu+82vvSlL4X8vrOz0ygoKDAefPBB/7KGhgYjIyPDWLlypWEYhvHHP/7RkGRs3brVv85bb71l+Hw+48CBA4lLvIfcfvvtxhe+8AWjs7OT9m4Cl6U8oqOjQ88//7xaWlpUVlam7du3q729XbNmzfKvc9ZZZ2n48OHatGmTJGnTpk0677zzNHToUP86l112mZqamvSHP/wh6XlwmwULFujKK68MKGNJlH0C7dmzR8OGDdPIkSN17bXXat++fZIo80R69dVXNWnSJF199dXKz89XaWmp/vd//9f/fXV1terq6gLKPicnRxdccEFA2Q8cOFCTJk3yrzNr1iz16dNHmzdvTl5mXOrkyZN65plndP3118vn89HeTSC4cbmdO3dqwIABysjI0E033aSXXnpJY8eOVV1dnfr166eBAwcGrD906FDV1dVJkurq6gIaftf3Xd8htOeff17vv/++Kioqen1H2SfGBRdcoOXLl+vtt9/W0qVLVV1drb/7u7/TiRMnKPME2rt3r5YuXarRo0frnXfe0c0336yFCxfq6aeflvS3sgtWtt3LPj8/P+D7vn37avDgwZS9CS+//LIaGhp03XXXSaKPMSPlXpzpNWPGjFFlZaUaGxv14osvau7cuVq/fr3dyfK0/fv36/bbb9eqVavUv39/u5OTMq644gr/v8eNG6cLLrhAI0aM0C9/+UuddtppNqbM2zo7OzVp0iTdf//9kqTS0lLt2rVLjz32mObOnWtz6lLDsmXLdMUVV2jYsGF2J8U1OHPjcv369dOoUaM0ceJEVVRUaPz48XrkkUdUUFCgkydPqqGhIWD9Q4cOqaCgQJJUUFDQ6+76rv93rYPetm/frsOHD+v8889X37591bdvX61fv14//elP1bdvXw0dOpSyT4KBAwfqi1/8oqqqqmjvCVRYWKixY8cGLDv77LP9lwS7yi5Y2XYv+8OHDwd8f+rUKR07doyyj6C2tla//e1vdcMNN/iX0d4jI7jxmM7OTrW1tWnixIlKT0/X6tWr/d/t3r1b+/btU1lZmSSprKxMO3fuDOh0Vq1apezs7F6dGf7my1/+snbu3KnKykr/Z9KkSbr22mv9/6bsE6+5uVkff/yxCgsLae8JNHXqVO3evTtg2UcffaQRI0ZIkkpKSlRQUBBQ9k1NTdq8eXNA2Tc0NGj79u3+ddasWaPOzk5dcMEFSciFez311FPKz8/XlVde6V9GezfB7juaEbtvf/vbxvr1643q6mrjww8/NL797W8bPp/P+M1vfmMYxl8eFRw+fLixZs0aY9u2bUZZWZlRVlbm/33Xo4KXXnqpUVlZabz99ttGXl5eyjwqaKXuT0sZBmWfCHfeeaexbt06o7q62ti4caMxa9YsIzc31zh8+LBhGJR5omzZssXo27ev8V//9V/Gnj17jGeffdbIzMw0nnnmGf86DzzwgDFw4EDjlVdeMT788EPjqquuCvooeGlpqbF582bj3XffNUaPHs2j4BF0dHQYw4cPN+6+++5e39HewyO4cbHrr7/eGDFihNGvXz8jLy/P+PKXv+wPbAzDMD777DPjlltuMQYNGmRkZmYa3/jGN4yDBw8GbKOmpsa44oorjNNOO83Izc017rzzTqO9vT3ZWXG9nsENZW+9a665xigsLDT69etnnHHGGcY111wTMNcKZZ44r732mnHuuecaGRkZxllnnWU8/vjjAd93dnYa3/3ud42hQ4caGRkZxpe//GVj9+7dAescPXrUKC8vNwYMGGBkZ2cb8+bNM06cOJHMbLjOO++8Y0jqVZaGQXuPhLeCAwAAT+GeGwAA4CkENwAAwFMIbgAAgKcQ3AAAAE8huAEAAJ5CcAMAADyF4AYAAHgKwQ0AAPAUghsAjjBjxgzdcccddicjKCenDUBvfe1OAADvmTFjhiZMmKCf/OQndifFEr/+9a+Vnp5udzIAmERwAwARDB482O4kAIgCl6UAWOq6667T+vXr9cgjj8jn88nn86mmpkbr16/XlClTlJGRocLCQn3729/WqVOnQm7njTfeUE5Ojp599llJ0v79+/UP//APGjhwoAYPHqyrrrpKNTU1AX939uzZeuihh1RYWKghQ4ZowYIFam9v96/z85//XKNHj1b//v01dOhQ/f3f/72pPPW8LFVcXKz7779f119/vbKysjR8+HA9/vjj0RUUgIQhuAFgqUceeURlZWW68cYbdfDgQR08eFDp6en6yle+osmTJ+uDDz7Q0qVLtWzZMv3nf/5n0G0899xzKi8v17PPPqtrr71W7e3tuuyyy5SVlaXf/e532rhxowYMGKDLL79cJ0+e9P9u7dq1+vjjj7V27Vo9/fTTWr58uZYvXy5J2rZtmxYuXKgf/OAH2r17t95++21NmzYt5nw+/PDDmjRpknbs2KFbbrlFN998s3bv3h3z9gBYh8tSACyVk5Ojfv36KTMzUwUFBZKke+65R0VFRfrZz34mn8+ns846S59++qnuvvtu3XvvverT52/HWY8++qjuuecevfbaa5o+fbok6Re/+IU6Ozv1xBNPyOfzSZKeeuopDRw4UOvWrdOll14qSRo0aJB+9rOfKS0tTWeddZauvPJKrV69WjfeeKP27dun008/XV/96leVlZWlESNGqLS0NOZ8fuUrX9Ett9wiSbr77rv14x//WGvXrtWYMWNi3iYAaxDcAEi4P/3pTyorK/MHJpI0depUNTc365NPPtHw4cMlSS+++KIOHz6sjRs3avLkyf51P/jgA1VVVSkrKytgu59//rk+/vhj///POeccpaWl+f9fWFionTt3SpIuueQSjRgxQiNHjtTll1+uyy+/XN/4xjeUmZkZU57GjRvn/7fP51NBQYEOHz4c07YAWIvLUgAco7S0VHl5eXryySdlGIZ/eXNzsyZOnKjKysqAz0cffaR/+qd/8q/X84kmn8+nzs5OSVJWVpbef/99rVy5UoWFhbr33ns1fvx4NTQ0xJTWcH8LgL0IbgBYrl+/furo6PD//+yzz9amTZsCApaNGzcqKytLZ555pn/ZF77wBa1du1avvPKKbrvtNv/y888/X3v27FF+fr5GjRoV8MnJyTGdrr59+2rWrFn60Y9+pA8//FA1NTVas2ZNnLkF4DQENwAsV1xcrM2bN6umpkZHjhzRLbfcov379+u2227Tn//8Z73yyiu67777tGjRooD7bSTpi1/8otauXatf/epX/ieUrr32WuXm5uqqq67S7373O1VXV2vdunVauHChPvnkE1Npev311/XTn/5UlZWVqq2t1YoVK9TZ2ck9MoAHEdwAsNxdd92ltLQ0jR07Vnl5eWpvb9ebb76pLVu2aPz48brppps0f/58fec73wn6+zFjxmjNmjVauXKl7rzzTmVmZmrDhg0aPny4vvnNb+rss8/W/Pnz9fnnnys7O9tUmgYOHKhf//rXuvjii3X22Wfrscce08qVK3XOOedYmXUADuAzup8nBgAAcDnO3AAAAE8huAGQ0vbt26cBAwaE/Ozbt8/uJAKIEpelAKS0U6dOBbzGoafi4mL17cuUYICbENwAAABP4bIUAADwFIIbAADgKQQ3AADAUwhuAACApxDcAAAATyG4AQAAnkJwAwAAPIXgBgAAeMr/B0WwkAd+9OVwAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cdf.plot.scatter(y='ttft', x='tokens_in')" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "f08d9085-d994-4754-a545-390cad1f4806", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAG2CAYAAACKxwc0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAAArU0lEQVR4nO3de1TVdb7/8dcWBERh4w2UBKWyvIZW6g/1aDWOl/GUaWdyHLt5XDYVxnjNnNS0y1CtLpqZTh1DOyUz1ag5XWwcRUzFK2hW5l3BvKAxgqAiwvf3R8t92nLbbDbs/YHnY629Vvt74/1m5d4vPt/P9/u1WZZlCQAAwFANvF0AAABAdRBmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABGI8wAAACjEWYAAIDRvBpmNmzYoLvvvluRkZGy2WxauXKl03rLsjRr1iy1bt1ajRo10oABA3TgwAHvFAsAAHySV8NMQUGBYmNjtWDBgjLXv/LKK3rzzTe1aNEibd26VY0bN9agQYN06dKlWq4UAAD4KpuvPGjSZrNpxYoVuvfeeyX9PCoTGRmpyZMna8qUKZKk3NxcRUREaMmSJfrd737nxWoBAICv8Pd2AeU5cuSITp06pQEDBjiW2e129erVS2lpaeWGmcLCQhUWFjrel5SUKCcnR82bN5fNZqvxugEAQPVZlqXz588rMjJSDRpUfCLJZ8PMqVOnJEkRERFOyyMiIhzrypKYmKg5c+bUaG0AAKB2ZGVlqU2bNhVu47Nhxl3Tp0/XpEmTHO9zc3MVHR2trKwshYaGerEyZ0d/KlBmzgUFNLBpyse7de7iFY//jHceuk29b2jh8eN6yqPv79DWwzkq/sWZTj+bTb2ub6Z3Hrrdi5UBALwtLy9PUVFRCgkJqXRbnw0zrVq1kiSdPn1arVu3diw/ffq0unXrVu5+gYGBCgwMLLU8NDTUp8LMLaGhuiVGemjxNhVYgWoQGODWcTpGNNHe0/mllsdd31yDu19f3TJrzOEz+dpy/KIU0MhpFrolacvxi/rpsp9iWjT2VnkAAB/hyhQRn73PTExMjFq1aqW1a9c6luXl5Wnr1q2Ki4vzYmWec/hMvjYcOOM0MlFV4+9qr37tWzot69e+pRY9cFt1y6tRx3IuVLj+6E8FtVQJAMB0Xh2Zyc/P18GDBx3vjxw5ol27dqlZs2aKjo7WhAkT9MILL6h9+/aKiYnRzJkzFRkZ6bjiyXSVfaG7otN1dr0fG6kjZwt09KcCtWve2KMjGofP5OtYzgWPH7dts+AK17drzqgMAMA1Xg0zO3bs0J133ul4f3Wuy8MPP6wlS5boqaeeUkFBgR599FGdO3dOffv21erVqxUUFOStkj2qsi/0ivjZbOpzYwtHwIhp4dmwce7CZSUk79KGA2ccy/q1b6n5o7rLHtyw2se/vmUT9WvfUpsOni01Z+aXfQEAUBmfuc9MTcnLy5Pdbldubq5X5sxUNLKRui9bDydtd+u4ngwWZXlo8bZyg8b7Y3t65GfkXijSk8kZNRaYAADmqsr3t89OADadKyMbu46fq9IxG9ikTpGhmj/q1hodubg6l+daxZalDQfO6MjZAo/8fHtwQ70/tmeNnSIDANQPPjsB2HQJybu06eBZp2WbDp7Vk8kZjvfd2oRVeIybIpy/2Pve2FIfjv1/Nf6FX9uTc2NaNNadN4cTZAAAbmFkpgZUNrLx9YEz+o/2LdX/5nA1DW6of18oKrVt0+CG+ufEO7wyasHkXACASRiZqQGVjWw8uHibHlq8TbkXirQqvq+aXjM/pGlwQ62K7yvJO6MWVyfn+l1zbb+fzaZ+7VsyggIA8ClMAK4Bh8/k667XUivc5trJtF8fOKP0zH/r1uim+o9r7hvjybpcvcyaybkAAG+qyvc3YaaGlHU1UFlSptxR4yMd1bnMmsm5AABvqMr3N6eZquHwmXyl7MvWkbOlJ8TOH9VdfW6s/LlItXGnW1cmI5eHybkAAF/HBGA3uDLScfWy4w37s/XQe+XfS6amJ9PW1mXWAAB4CyMzbqjKSEe/m8K9OpmWZyABAOo6wkwVlfdwyF+OdFyrrFNOfW5sofmjutdorRKXWQMA6j5OM1WRKyMd1462ePNOtzwDCQBQ1zEyU0XVGenw1mTaykaGKprIDACAr2NkpopMHOkob2To3IXLemjxNu4lAwAwGiMzbvDmHJjquHZkqDqXbAMA4CsYmXFDXXjaM5dsAwDqCsJMNcS0MC/EXOXORGYAAHwRp5nqKS7ZBgDUFYSZOsiVq5N4MjYAoK7gNFMdUtUHSs4f1b3Uk7FNmMgMAMAv8dTsOqSsJ3VfvWT8/bE9y93P5InMAIC6qSrf34zM1BHVuTrJ5InMAAAwZ8bHuXp3Xh4oCQCorxiZ8VFVnf/C1UkAgPqKkRkfVdW783J1EgCgviLM+KCr81+Kr5mb/cv5L2Ux9TELAABUB6eZfJC7d+etC49ZAACgqggzPqi681+4OgkAUJ9wmskHMf8FAADXEWZ8FPNfAABwDaeZfBTzXwAAcA1hxscx/wUAgIoRZmrR4TP5OpZzgVEWAAA8iDBTC6p6N18AAOA6JgDXgqrezRcAALiOMFPD3L2bLwAAcA1hpobxNGsAAGoWYaaG8TRrAABqFmGmhnE3XwAAahZhphZwN18AAGoOl2bXAu7mCwBAzSHM1CLu5gsAgOdxmgkAABiNMAMAAIxGmAEAAEYjzAAAAKMRZgAAgNEIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABGI8wAAACjEWYAAIDRCDMAAMBoPh1miouLNXPmTMXExKhRo0a64YYb9Pzzz8uyLG+XBgAAfIS/twuoyMsvv6yFCxdq6dKl6ty5s3bs2KExY8bIbrcrISHB2+UBAAAf4NNhZvPmzRo2bJiGDh0qSWrXrp2Sk5O1bds2L1cGAAB8hU+fZurdu7fWrl2r/fv3S5J2796tjRs3asiQIeXuU1hYqLy8PKcXAACou3x6ZObpp59WXl6eOnToID8/PxUXF+vFF1/U6NGjy90nMTFRc+bMqcUqAQCAN/n0yMxHH32kDz/8UMuWLVN6erqWLl2qV199VUuXLi13n+nTpys3N9fxysrKqsWKAQBAbbNZPnxpUFRUlJ5++mnFx8c7lr3wwgv64IMP9MMPP7h0jLy8PNntduXm5io0NLSmSgUAAB5Ule9vnx6ZuXDhgho0cC7Rz89PJSUlXqoIAAD4Gp+eM3P33XfrxRdfVHR0tDp37qyMjAy9/vrr+u///m9vlwYAAHyET59mOn/+vGbOnKkVK1YoOztbkZGRGjVqlGbNmqWAgACXjsFpJgAAzFOV72+fDjOeQJgBAMA8dWbODAAAQGUIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABGI8wAAACjEWYAAIDRCDMAAMBohBkAAGA0wgwAADAaYQYAABiNMAMAAIxGmAEAAEYjzAAAAKMRZgAAgNEIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABGI8wAAACjEWYAAIDRCDMAAMBohBkAAGA0wgwAADAaYQYAABiNMAMAAIxGmAEAAEYjzAAAAKMRZgAAgNEIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABG8/kw8+OPP+qBBx5Q8+bN1ahRI3Xt2lU7duzwdlkAAMBH+Hu7gIr8+9//Vp8+fXTnnXfqyy+/VMuWLXXgwAE1bdrU26UBAAAf4dNh5uWXX1ZUVJSSkpIcy2JiYrxYEQAA8DU+fZpp1apVuv322/Xb3/5W4eHh6t69u959990K9yksLFReXp7TCwAA1F0+HWYOHz6shQsXqn379vrqq6/0+OOPKyEhQUuXLi13n8TERNntdscrKiqqFisGAAC1zWZZluXtIsoTEBCg22+/XZs3b3YsS0hI0Pbt25WWllbmPoWFhSosLHS8z8vLU1RUlHJzcxUaGlrjNQMAgOrLy8uT3W536fvbp0dmWrdurU6dOjkt69ixozIzM8vdJzAwUKGhoU4vAABQd/l0mOnTp4/27dvntGz//v1q27atlyoCAAC+xu0wc+jQIc2YMUOjRo1Sdna2JOnLL7/Ud99957HiJk6cqC1btujPf/6zDh48qGXLlumdd95RfHy8x34GAAAwm1thJjU1VV27dtXWrVu1fPly5efnS5J2796tZ5991mPF9ejRQytWrFBycrK6dOmi559/XnPnztXo0aM99jMAAIDZ3JoAHBcXp9/+9reaNGmSQkJCtHv3bl1//fXatm2bRowYoePHj9dErW6pygQiAADgG2p8AvCePXs0fPjwUsvDw8N19uxZdw4JAADgFrfCTFhYmE6ePFlqeUZGhq677rpqFwUAAOAqt8LM7373O02bNk2nTp2SzWZTSUmJNm3apClTpuihhx7ydI0AAADlcivM/PnPf1aHDh0UFRWl/Px8derUSf369VPv3r01Y8YMT9cIAABQrmrdATgzM1Pffvut8vPz1b17d7Vv396TtXkEE4ABADBPVb6/q/XU7OjoaEVHR1fnEAAAANXiVpixLEuffPKJUlJSlJ2drZKSEqf1y5cv90hxAAAAlXErzEyYMEF/+ctfdOeddyoiIkI2m83TdQEAALjErTDzv//7v1q+fLl+85vfeLoeAACAKnHraia73a7rr7/e07UAAABUmVthZvbs2ZozZ44uXrzo6XoAAACqxK3TTPfff7+Sk5MVHh6udu3aqWHDhk7r09PTPVIcAABAZdwKMw8//LB27typBx54gAnAAADAq9wKM59//rm++uor9e3b19P1AAAAVIlbc2aioqK4my4AAPAJboWZ1157TU899ZSOHj3q4XIAAACqxq3TTA888IAuXLigG264QcHBwaUmAOfk5HikOAAAgMq4FWbmzp3r4TIAAADc4/bVTAAAAL7A5TCTl5fnmPSbl5dX4bZMDgYAALXF5TDTtGlTnTx5UuHh4QoLCyvz3jKWZclms6m4uNijRQIAAJTH5TCzbt06NWvWTJKUkpJSYwUBAABUhcthpn///o7/jomJUVRUVKnRGcuylJWV5bnqAAAAKuHWfWZiYmJ05syZUstzcnIUExNT7aIAAABc5VaYuTo35lr5+fkKCgqqdlEAAACuqtKl2ZMmTZIk2Ww2zZw5U8HBwY51xcXF2rp1q7p16+bRAgEAACpSpTCTkZEh6eeRmT179iggIMCxLiAgQLGxsZoyZYpnKwQAAKhAlcLM1auYxowZo3nz5lV6P5njx48rMjJSDRq4dTYLAACgUm6ljKSkJJdujNepUyceRgkAAGpUjQ6ZWJZVk4cHAACo2TADAABQ0wgzAADAaIQZAABgtBoNM2XdWA8AAMCTmAAMAACMVqX7zFTV999/r8jIyJr8EQAAoJ5zOcyMGDHC5YMuX75ckhQVFVX1igAAAKrA5TBjt9trsg4AAAC3uBxmkpKSarIOAAAAt3BpNgAAMJrbE4A/+eQTffTRR8rMzNTly5ed1qWnp1e7MAAAAFe4NTLz5ptvasyYMYqIiFBGRoZ69uyp5s2b6/DhwxoyZIinawQAACiXW2Hm7bff1jvvvKP58+crICBATz31lNasWaOEhATl5uZ6ukYAAIByuRVmMjMz1bt3b0lSo0aNdP78eUnSgw8+qOTkZM9VBwAAUAm3wkyrVq2Uk5MjSYqOjtaWLVskSUeOHOGuvwAAoFa5FWbuuusurVq1SpI0ZswYTZw4Ub/+9a81cuRIDR8+3KMFAgAAVMRmuTGUUlJSopKSEvn7/3wx1F//+ldt3rxZ7du31x/+8AcFBAR4vFB35eXlyW63Kzc3V6Ghod4uBwAAuKAq399uhZnMzExFRUWVeiq2ZVnKyspSdHR0VQ9ZYwgzAACYpyrf326dZoqJidGZM2dKLc/JyVFMTIw7hwQAAHCLW2HGsqxSozKSlJ+fr6CgoGoXBQAA4Koq3QF40qRJkiSbzaaZM2cqODjYsa64uFhbt25Vt27dPFogAABARaoUZjIyMiT9PDKzZ88ep4m+AQEBio2N1ZQpUzxbIQAAQAWqFGZSUlIk/Xw59rx585hQCwAAvM6tB00mJSU5/vv48eOSpDZt2nimIgAAgCpwawJwSUmJnnvuOdntdrVt21Zt27ZVWFiYnn/+eZWUlHi6RgAAgHK5NTLzzDPPaPHixXrppZfUp08fSdLGjRs1e/ZsXbp0SS+++KJHiwQAACiPWzfNi4yM1KJFi3TPPfc4Lf/000/1xBNP6Mcff/RYgdXFTfMAADBPjd80LycnRx06dCi1vEOHDo4HUAIAANQGt8JMbGys3nrrrVLL33rrLcXGxla7KAAAAFe5NWfmlVde0dChQ/Wvf/1LcXFxkqS0tDRlZWXpiy++8GiBAAAAFXH72Uz79+/X8OHDde7cOZ07d04jRozQvn371LZtW0/XCAAAUC63JgD7+fnp5MmTCg8Pd1r+008/KTw8XMXFxR4rsLqYAAwAgHlqfAJwefmHB00CAIDa5vaDJmfNmlXrD5p86aWXNH36dP3xj3/U3Llza+znAAAAcxjzoMnt27frL3/5i2655ZYaOT4AADCTEQ+azM/P1+jRo/Xuu+/qhRdeqJWfCQAAzODWnJmkpKRanUwbHx+voUOHasCAAZVuW1hYqLy8PKcXAACou9y6z0xt+utf/6r09HRt377dpe0TExM1Z86cGq4KAAD4CrdGZmpLVlaW/vjHP+rDDz90+Sqp6dOnKzc31/HKysqq4SoBAIA3uXWfmdqycuVKDR8+XH5+fo5lxcXFstlsatCggQoLC53WlYX7zAAAYJ6qfH/79GmmX/3qV9qzZ4/TsjFjxqhDhw6aNm1apUEGAADUfT4dZkJCQtSlSxenZY0bN1bz5s1LLQcAAPWTT8+ZAQAAqIxPj8yUZf369d4uAQAA+BBGZgAAgNEIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABGI8wAAACjEWYAAIDRCDMAAMBohBkAAGA0wgwAADAaYQYAABiNMAMAAIxGmAEAAEYjzAAAAKMRZgAAgNEIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABGI8wAAACjEWYAAIDRCDMAAMBohBkAAGA0wgwAADAaYQYAABiNMAMAAIxGmAEAAEYjzAAAAKMRZgAAgNEIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABG8/kwk5iYqB49eigkJETh4eG69957tW/fPm+XBQAAfITPh5nU1FTFx8dry5YtWrNmjYqKijRw4EAVFBR4uzQAAOADbJZlWd4uoirOnDmj8PBwpaamql+/fpVun5eXJ7vdrtzcXIWGhtZChQAAoLqq8v3tX0s1eUxubq4kqVmzZmWuLywsVGFhoeN9Xl5erdQFAAC8w+dPM/1SSUmJJkyYoD59+qhLly5lbpOYmCi73e54RUVF1XKVAACgNhl1munxxx/Xl19+qY0bN6pNmzZlblPWyExUVBSnmQAAMEidPM00fvx4ffbZZ9qwYUO5QUaSAgMDFRgYWIuVAQAAb/L5MGNZlp588kmtWLFC69evV0xMjLdLAgAAPsTnw0x8fLyWLVumTz/9VCEhITp16pQkyW63q1GjRl6uDgAAeJvPz5mx2WxlLk9KStIjjzxS6f5cmg0AgHnq1JwZH89aAADAy4y6NBsAAOBahBkAAGA0wgwAADAaYQYAABiNMAMAAIxGmAEAAEYjzAAAAKMRZgAAgNEIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABGI8wAAACjEWYAAIDRCDMAAMBohBkAAGA0wgwAADAaYQYAABiNMAMAAIxGmAEAAEYjzAAAAKMRZgAAgNEIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMAAAwGmEGAAAYjTADAACMRpgBAABGI8wAAACjEWYAAIDRCDMAAMBohBkAAGA0wgwAADAaYQYAABiNMAMAAIxGmAEAAEYjzAAAAKMRZgAAgNEIMwAAwGiEGQAAYDTCDAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxkRZhYsWKB27dopKChIvXr10rZt27xdEgAA8BE+H2b+9re/adKkSXr22WeVnp6u2NhYDRo0SNnZ2d4uDQAA+ACfDzOvv/66xo0bpzFjxqhTp05atGiRgoOD9d5773m7NAAA4AP8vV1ARS5fvqydO3dq+vTpjmUNGjTQgAEDlJaWVuY+hYWFKiwsdLzPzc2VJOXl5dVssQAAwGOufm9bllXptj4dZs6ePavi4mJFREQ4LY+IiNAPP/xQ5j6JiYmaM2dOqeVRUVE1UiMAAKg558+fl91ur3Abnw4z7pg+fbomTZrkeF9SUqKcnBw1b95cNpvNoz8rLy9PUVFRysrKUmhoqEeP7evqc+9S/e6/Pvcu1e/+63PvUv3u3xu9W5al8+fPKzIystJtfTrMtGjRQn5+fjp9+rTT8tOnT6tVq1Zl7hMYGKjAwECnZWFhYTVVoiQpNDS03v2PfVV97l2q3/3X596l+t1/fe5dqt/913bvlY3IXOXTE4ADAgJ02223ae3atY5lJSUlWrt2reLi4rxYGQAA8BU+PTIjSZMmTdLDDz+s22+/XT179tTcuXNVUFCgMWPGeLs0AADgA3w+zIwcOVJnzpzRrFmzdOrUKXXr1k2rV68uNSnYGwIDA/Xss8+WOq1VH9Tn3qX63X997l2q3/3X596l+t2/r/dus1y55gkAAMBH+fScGQAAgMoQZgAAgNEIMwAAwGiEGQAAYDTCzDU2bNigu+++W5GRkbLZbFq5cqVjXVFRkaZNm6auXbuqcePGioyM1EMPPaQTJ044HSMnJ0ejR49WaGiowsLCNHbsWOXn59dyJ1VXUe/Xeuyxx2Sz2TR37lyn5ab2LrnW/969e3XPPffIbrercePG6tGjhzIzMx3rL126pPj4eDVv3lxNmjTRfffdV+qmj76ost7z8/M1fvx4tWnTRo0aNXI89PWXTO09MTFRPXr0UEhIiMLDw3Xvvfdq3759Ttu40ltmZqaGDh2q4OBghYeHa+rUqbpy5UpttuKWyvrPycnRk08+qZtvvlmNGjVSdHS0EhISHM+9u6qu9v9LlmVpyJAhZf4bMbF/V3tPS0vTXXfdpcaNGys0NFT9+vXTxYsXHet94XOfMHONgoICxcbGasGCBaXWXbhwQenp6Zo5c6bS09O1fPly7du3T/fcc4/TdqNHj9Z3332nNWvW6LPPPtOGDRv06KOP1lYLbquo919asWKFtmzZUuYtpk3tXaq8/0OHDqlv377q0KGD1q9fr2+++UYzZ85UUFCQY5uJEyfqH//4hz7++GOlpqbqxIkTGjFiRG214LbKep80aZJWr16tDz74QHv37tWECRM0fvx4rVq1yrGNqb2npqYqPj5eW7Zs0Zo1a1RUVKSBAweqoKDAsU1lvRUXF2vo0KG6fPmyNm/erKVLl2rJkiWaNWuWN1qqksr6P3HihE6cOKFXX31V3377rZYsWaLVq1dr7NixjmPU5f5/ae7cuWU+FsfU/l3pPS0tTYMHD9bAgQO1bds2bd++XePHj1eDBv8XH3zic99CuSRZK1asqHCbbdu2WZKsY8eOWZZlWd9//70lydq+fbtjmy+//NKy2WzWjz/+WJPlelR5vR8/fty67rrrrG+//dZq27at9cYbbzjW1ZXeLavs/keOHGk98MAD5e5z7tw5q2HDhtbHH3/sWLZ3715LkpWWllZTpXpcWb137tzZeu6555yW3XrrrdYzzzxjWVbd6d2yLCs7O9uSZKWmplqW5VpvX3zxhdWgQQPr1KlTjm0WLlxohYaGWoWFhbXbQDVd239ZPvroIysgIMAqKiqyLKt+9J+RkWFdd9111smTJ0v9G6kr/ZfVe69evawZM2aUu4+vfO4zMlNNubm5stlsjuc/paWlKSwsTLfffrtjmwEDBqhBgwbaunWrl6r0jJKSEj344IOaOnWqOnfuXGp9Xe/9888/10033aRBgwYpPDxcvXr1chpq3rlzp4qKijRgwADHsg4dOig6OlppaWleqNpzevfurVWrVunHH3+UZVlKSUnR/v37NXDgQEl1q/erp0+aNWsmybXe0tLS1LVrV6ebeQ4aNEh5eXn67rvvarH66ru2//K2CQ0Nlb//z/ddrev9X7hwQb///e+1YMGCMp8LWFf6v7b37Oxsbd26VeHh4erdu7ciIiLUv39/bdy40bGPr3zuE2aq4dKlS5o2bZpGjRrlePDWqVOnFB4e7rSdv7+/mjVrplOnTnmjTI95+eWX5e/vr4SEhDLX1+Xes7OzlZ+fr5deekmDBw/WP//5Tw0fPlwjRoxQamqqpJ/7DwgIKPVg04iICOP7nz9/vjp16qQ2bdooICBAgwcP1oIFC9SvXz9Jdaf3kpISTZgwQX369FGXLl0kudbbqVOnSt2V/Op70/u/1tmzZ/X88887nUao6/1PnDhRvXv31rBhw8rcry70X1bvhw8fliTNnj1b48aN0+rVq3XrrbfqV7/6lQ4cOCDJdz73ff5xBr6qqKhI999/vyzL0sKFC71dTo3buXOn5s2bp/T09DLPGdd1JSUlkqRhw4Zp4sSJkqRu3bpp8+bNWrRokfr37+/N8mrc/PnztWXLFq1atUpt27bVhg0bFB8fr8jISKcRC9PFx8fr22+/dfrLsz6prP+8vDwNHTpUnTp10uzZs2u3uFpQVv+rVq3SunXrlJGR4cXKal5ZvV/93PvDH/7geB5i9+7dtXbtWr333ntKTEz0Sq1lYWTGDVeDzLFjx7RmzRqnx6G3atVK2dnZTttfuXJFOTk5ZQ5PmuLrr79Wdna2oqOj5e/vL39/fx07dkyTJ09Wu3btJNXd3iWpRYsW8vf3V6dOnZyWd+zY0XE1U6tWrXT58mWdO3fOaZvTp08b3f/Fixf1pz/9Sa+//rruvvtu3XLLLRo/frxGjhypV199VVLd6H38+PH67LPPlJKSojZt2jiWu9Jbq1atSl3ddPW96f1fdf78eQ0ePFghISFasWKFGjZs6FhXl/tft26dDh06pLCwMMdnnyTdd999uuOOOySZ3395vbdu3VqSKv3c84XPfcJMFV0NMgcOHNC//vUvNW/e3Gl9XFyczp07p507dzqWrVu3TiUlJerVq1dtl+sxDz74oL755hvt2rXL8YqMjNTUqVP11VdfSaq7vUtSQECAevToUeqyxf3796tt27aSpNtuu00NGzbU2rVrHev37dunzMxMxcXF1Wq9nlRUVKSioiKnqxckyc/Pz/GXm8m9W5al8ePHa8WKFVq3bp1iYmKc1rvSW1xcnPbs2eP0oX71D51rvwh8TWX9Sz+PyAwcOFABAQFatWqV0xV8Ut3u/+mnny712SdJb7zxhpKSkiSZ239lvbdr106RkZEVfu75zOd+rU01NsT58+etjIwMKyMjw5Jkvf7661ZGRoZ17Ngx6/Lly9Y999xjtWnTxtq1a5d18uRJx+uXM9YHDx5sde/e3dq6dau1ceNGq3379taoUaO82JVrKuq9LNdezWRZ5vZuWZX3v3z5cqthw4bWO++8Yx04cMCaP3++5efnZ3399deOYzz22GNWdHS0tW7dOmvHjh1WXFycFRcX562WXFZZ7/3797c6d+5spaSkWIcPH7aSkpKsoKAg6+2333Ycw9TeH3/8cctut1vr1693+jd94cIFxzaV9XblyhWrS5cu1sCBA61du3ZZq1evtlq2bGlNnz7dGy1VSWX95+bmWr169bK6du1qHTx40GmbK1euWJZVt/svi665msnU/l3p/Y033rBCQ0Otjz/+2Dpw4IA1Y8YMKygoyDp48KBjG1/43CfMXCMlJcWSVOr18MMPW0eOHClznSQrJSXFcYyffvrJGjVqlNWkSRMrNDTUGjNmjHX+/HnvNeWiinovS1lhxtTeLcu1/hcvXmzdeOONVlBQkBUbG2utXLnS6RgXL160nnjiCatp06ZWcHCwNXz4cOvkyZO13EnVVdb7yZMnrUceecSKjIy0goKCrJtvvtl67bXXrJKSEscxTO29vH/TSUlJjm1c6e3o0aPWkCFDrEaNGlktWrSwJk+e7Lh02ZdV1n95/29Iso4cOeI4Tl3tv7x9rr19gYn9u9p7YmKi1aZNGys4ONiKi4tz+gPOsnzjc99mWZbliREeAAAAb2DODAAAMBphBgAAGI0wAwAAjEaYAQAARiPMAAAAoxFmAACA0QgzAADAaIQZAABgNMIMgBp3xx13aMKECd4uA0AdRZgBUCUEk9Jmz56tbt26ebsMoN4izAAAAKMRZgC47JFHHlFqaqrmzZsnm80mm82mo0ePKjU1VT179lRgYKBat26tp59+WleuXCn3OJ9//rnsdrs+/PBDSVJWVpbuv/9+hYWFqVmzZho2bJiOHj3q9HPvvfdevfrqq2rdurWaN2+u+Ph4FRUVObZ5++231b59ewUFBSkiIkL/9V//5VJPhYWFSkhIUHh4uIKCgtS3b19t377dsX7JkiUKCwtz2mflypWy2WyO9XPmzNHu3bsdv5MlS5a49LMBeAZhBoDL5s2bp7i4OI0bN04nT57UyZMn1bBhQ/3mN79Rjx49tHv3bi1cuFCLFy/WCy+8UOYxli1bplGjRunDDz/U6NGjVVRUpEGDBikkJERff/21Nm3apCZNmmjw4MG6fPmyY7+UlBQdOnRIKSkpWrp0qZYsWeIIDTt27FBCQoKee+457du3T6tXr1a/fv1c6umpp57S3//+dy1dulTp6em68cYbNWjQIOXk5Li0/8iRIzV58mR17tzZ8TsZOXKkS/sC8Ax/bxcAwBx2u10BAQEKDg5Wq1atJEnPPPOMoqKi9NZbb8lms6lDhw46ceKEpk2bplmzZqlBg//7m2nBggV65pln9I9//EP9+/eXJP3tb39TSUmJ/ud//scx2pGUlKSwsDCtX79eAwcOlCQ1bdpUb731lvz8/NShQwcNHTpUa9eu1bhx45SZmanGjRvrP//zPxUSEqK2bduqe/fulfZTUFCghQsXasmSJRoyZIgk6d1339WaNWu0ePFiTZ06tdJjNGrUSE2aNJG/v7/jdwKgdhFmAFTL3r17FRcX5wgiktSnTx/l5+fr+PHjio6OliR98sknys7O1qZNm9SjRw/Htrt379bBgwcVEhLidNxLly7p0KFDjvedO3eWn5+f433r1q21Z88eSdKvf/1rtW3bVtdff70GDx6swYMHa/jw4QoODq6w9kOHDqmoqEh9+vRxLGvYsKF69uypvXv3uvHbAOANnGYCUCu6d++uli1b6r333pNlWY7l+fn5uu2227Rr1y6n1/79+/X73//esV3Dhg2djmez2VRSUiJJCgkJUXp6upKTk9W6dWvNmjVLsbGxOnfuXLXrbtCggVO9kpzm6gDwPsIMgCoJCAhQcXGx433Hjh2Vlpbm9IW/adMmhYSEqE2bNo5lN9xwg1JSUvTpp5/qySefdCy/9dZbdeDAAYWHh+vGG290etntdpfr8vf314ABA/TKK6/om2++0dGjR7Vu3boK97nhhhsUEBCgTZs2OZYVFRVp+/bt6tSpkySpZcuWOn/+vAoKChzb7Nq1q8LfCYDaRZgBUCXt2rXT1q1bdfToUZ09e1ZPPPGEsrKy9OSTT+qHH37Qp59+qmeffVaTJk1ymi8jSTfddJNSUlL097//3XGvmtGjR6tFixYaNmyYvv76ax05ckTr169XQkKCjh8/7lJNn332md58803t2rVLx44d0/vvv6+SkhLdfPPNFe7XuHFjPf7445o6dapWr16t77//XuPGjdOFCxc0duxYSVKvXr0UHBysP/3pTzp06JCWLVtW6mqldu3a6ciRI9q1a5fOnj2rwsJC136ZADyCMAOgSqZMmSI/Pz916tRJLVu2VFFRkb744gtt27ZNsbGxeuyxxzR27FjNmDGjzP1vvvlmrVu3TsnJyZo8ebKCg4O1YcMGRUdHa8SIEerYsaPGjh2rS5cuKTQ01KWawsLCtHz5ct11113q2LGjFi1apOTkZHXu3LnSfV966SXdd999evDBB3Xrrbfq4MGD+uqrr9S0aVNJUrNmzfTBBx/oiy++UNeuXZWcnKzZs2c7HeO+++7T4MGDdeedd6ply5ZKTk52qW4AnmGzrj0ZDAAAYBBGZgAAgNEIMwDqrMzMTDVp0qTcV2ZmprdLBOABnGYCUGdduXLF6bEI12rXrp38/bndFmA6wgwAADAap5kAAIDRCDMAAMBohBkAAGA0wgwAADAaYQYAABiNMAMAAIxGmAEAAEb7//cveKIa+CqcAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cdf.plot.scatter(y='total_time', x='tokens_out', ylim=[0,10])" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "6f520389-aea3-4f0f-a23a-9094b57251c8", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ttfttotal_timetokens_intokens_outinter_tokens_delaytotal_tokens_per_sout_tokens_per_s
count181.000000181.000000181.000000181.000000181.000000181.000000181.000000
mean2.20935010.661870511.988950137.9447510.07750961.12358412.934065
std0.9108440.810370136.90519814.2819710.00385012.9826010.658547
min0.2467838.863190258.000000120.0000000.06459233.38616311.382172
25%1.64669610.251724404.000000131.0000000.07563050.12681112.559732
50%2.67537110.556036517.000000136.0000000.07716663.10508312.959061
75%2.99817510.958826630.000000140.0000000.07962071.55723913.222181
max3.19851816.923179738.000000262.0000000.08785784.33061215.481725
\n", - "
" - ], - "text/plain": [ - " ttft total_time tokens_in tokens_out inter_tokens_delay \\\n", - "count 181.000000 181.000000 181.000000 181.000000 181.000000 \n", - "mean 2.209350 10.661870 511.988950 137.944751 0.077509 \n", - "std 0.910844 0.810370 136.905198 14.281971 0.003850 \n", - "min 0.246783 8.863190 258.000000 120.000000 0.064592 \n", - "25% 1.646696 10.251724 404.000000 131.000000 0.075630 \n", - "50% 2.675371 10.556036 517.000000 136.000000 0.077166 \n", - "75% 2.998175 10.958826 630.000000 140.000000 0.079620 \n", - "max 3.198518 16.923179 738.000000 262.000000 0.087857 \n", - "\n", - " total_tokens_per_s out_tokens_per_s \n", - "count 181.000000 181.000000 \n", - "mean 61.123584 12.934065 \n", - "std 12.982601 0.658547 \n", - "min 33.386163 11.382172 \n", - "25% 50.126811 12.559732 \n", - "50% 63.105083 12.959061 \n", - "75% 71.557239 13.222181 \n", - "max 84.330612 15.481725 " - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cdf.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "81c9cdee-028f-448b-9179-04aa758e4f37", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAGdCAYAAAAIbpn/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAgzElEQVR4nO3df3AU9f3H8ddByAU0CQbIryGSyC8VBCoKRVBBkPBjGH7NVPFXQOqvBgtGpNBa+UacBrEi2kZwppJILaJUwFYLCAFCEdDhl4B1IkR+ShIoakJiOWJuv3843niQn8fldj/J8zGzM729vcs72+N8zt7m1mVZliUAAAADtbB7AAAAgEARMgAAwFiEDAAAMBYhAwAAjEXIAAAAYxEyAADAWIQMAAAwFiEDAACMFWb3AI3N6/Xq1KlTioyMlMvlsnscAABQD5Zl6dy5c0pMTFSLFjUfd2nyIXPq1CklJSXZPQYAAAjAiRMn1LFjxxrvb/IhExkZKemHHREVFWXzNAAAoD7KysqUlJTk++94TZp8yPz4cVJUVBQhAwCAYeo6LYSTfQEAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYKwwuwcAAADOlDz7gzq3OTp/dAgmqRlHZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxbA2ZrKws3XzzzYqMjFRsbKzGjRungoICv20GDx4sl8vltzz66KM2TQwAAJzE1pDJz89Xenq6du7cqQ0bNqiyslLDhw9XRUWF33YPPfSQioqKfMuCBQtsmhgAADiJrddaWrdund/t3NxcxcbGavfu3brtttt869u0aaP4+PhQjwcAABzOUefIlJaWSpJiYmL81v/tb39T+/bt1bNnT82ZM0ffffddjc/h8XhUVlbmtwAAgKbJMVe/9nq9mjFjhgYOHKiePXv61t9zzz3q1KmTEhMTtX//fv3mN79RQUGBVq1aVe3zZGVlKTMzM1RjAwAAG7ksy7LsHkKSHnvsMa1du1bbtm1Tx44da9xu06ZNGjp0qA4fPqzOnTtfcr/H45HH4/HdLisrU1JSkkpLSxUVFdUoswMA0BQlz/6gzm2Ozh/dKD+7rKxM0dHRdf732xFHZKZNm6b3339fW7durTViJKl///6SVGPIuN1uud3uRpkTAAA4i60hY1mWHn/8ca1evVpbtmxRSkpKnY/Zt2+fJCkhIaGRpwMAAE5na8ikp6dr+fLleu+99xQZGani4mJJUnR0tFq3bq3CwkItX75co0aNUrt27bR//3498cQTuu2229SrVy87RwcAAA5ga8gsXrxY0g9fevdTOTk5mjx5ssLDw7Vx40YtWrRIFRUVSkpK0sSJE/X000/bMC0AAHAa2z9aqk1SUpLy8/NDNA0AADCNo75HBgAAoCEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGsjVksrKydPPNNysyMlKxsbEaN26cCgoK/LY5f/680tPT1a5dO1155ZWaOHGiSkpKbJoYAAA4ia0hk5+fr/T0dO3cuVMbNmxQZWWlhg8froqKCt82TzzxhP75z39q5cqVys/P16lTpzRhwgQbpwYAAE4RZucPX7dund/t3NxcxcbGavfu3brttttUWlqq119/XcuXL9cdd9whScrJydF1112nnTt36uc//7kdYwMAAIdw1DkypaWlkqSYmBhJ0u7du1VZWalhw4b5trn22mt19dVXa8eOHbbMCAAAnMPWIzI/5fV6NWPGDA0cOFA9e/aUJBUXFys8PFxt27b12zYuLk7FxcXVPo/H45HH4/HdLisra7SZAQCAvRwTMunp6Tp48KC2bdt2Wc+TlZWlzMzMIE0FAGjKkmd/UOc2R+ePDsEkCJQjPlqaNm2a3n//fW3evFkdO3b0rY+Pj9eFCxf07bff+m1fUlKi+Pj4ap9rzpw5Ki0t9S0nTpxozNEBAICNbA0Zy7I0bdo0rV69Wps2bVJKSorf/X379lWrVq2Ul5fnW1dQUKDjx49rwIAB1T6n2+1WVFSU3wIAAJomWz9aSk9P1/Lly/Xee+8pMjLSd95LdHS0WrdurejoaE2dOlUZGRmKiYlRVFSUHn/8cQ0YMIC/WAIAAPaGzOLFiyVJgwcP9lufk5OjyZMnS5JeeukltWjRQhMnTpTH41FqaqpeffXVEE8KAACcyNaQsSyrzm0iIiKUnZ2t7OzsEEwEAABM4oiTfQEAAAJByAAAAGMRMgAAwFiEDAAAMBYhAwAAjEXIAAAAYxEyAADAWI65aCQAAE1ZsC5QyYUu/XFEBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQIKmS+//DLYcwAAADRYQCHTpUsXDRkyRG+++abOnz8f7JkAAADqJaCQ2bNnj3r16qWMjAzFx8frkUce0SeffBLs2QAAAGoVUMj06dNHL7/8sk6dOqWlS5eqqKhIgwYNUs+ePbVw4UKdOXMm2HMCAABc4rJO9g0LC9OECRO0cuVKPf/88zp8+LBmzpyppKQkPfDAAyoqKgrWnAAAAJe4rJDZtWuXfvWrXykhIUELFy7UzJkzVVhYqA0bNujUqVMaO3ZssOYEAAC4RFggD1q4cKFycnJUUFCgUaNGadmyZRo1apRatPihi1JSUpSbm6vk5ORgzgoAAOAnoJBZvHixHnzwQU2ePFkJCQnVbhMbG6vXX3/9soYDAACoTUAhc+jQoTq3CQ8PV1paWiBPDwAAUC8BnSOTk5OjlStXXrJ+5cqVeuONNy57KAAAgPoIKGSysrLUvn37S9bHxsbqD3/4Q72fZ+vWrRozZowSExPlcrm0Zs0av/snT54sl8vlt4wYMSKQkQEAQBMUUMgcP35cKSkpl6zv1KmTjh8/Xu/nqaioUO/evZWdnV3jNiNGjFBRUZFveeuttwIZGQAANEEBnSMTGxur/fv3X/JXSZ9++qnatWtX7+cZOXKkRo4cWes2brdb8fHxgYwJAACauICOyEyaNEm//vWvtXnzZlVVVamqqkqbNm3S9OnTdffddwd1wC1btig2Nlbdu3fXY489prNnz9a6vcfjUVlZmd8CAACapoCOyMybN09Hjx7V0KFDFRb2w1N4vV498MADDTpHpi4jRozQhAkTlJKSosLCQv32t7/VyJEjtWPHDrVs2bLax2RlZSkzMzNoMwAAAOcKKGTCw8P19ttva968efr000/VunVr3XDDDerUqVNQh/vp0Z0bbrhBvXr1UufOnbVlyxYNHTq02sfMmTNHGRkZvttlZWVKSkoK6lwAAMAZAgqZH3Xr1k3dunUL1ix1uuaaa9S+fXsdPny4xpBxu91yu90hmwkAANgnoJCpqqpSbm6u8vLydPr0aXm9Xr/7N23aFJThLnby5EmdPXu2xm8TBgAAzUtAITN9+nTl5uZq9OjR6tmzp1wuV0A/vLy8XIcPH/bdPnLkiPbt26eYmBjFxMQoMzNTEydOVHx8vAoLCzVr1ix16dJFqampAf08AADQtAQUMitWrNA777yjUaNGXdYP37Vrl4YMGeK7/eO5LWlpaVq8eLH279+vN954Q99++60SExM1fPhwzZs3j4+OAACApMs42bdLly6X/cMHDx4sy7JqvH/9+vWX/TMAAEDTFdD3yDz55JN6+eWXa40QAACAxhbQEZlt27Zp8+bNWrt2rXr06KFWrVr53b9q1aqgDAcAAFCbgEKmbdu2Gj9+fLBnAQAAaJCAQiYnJyfYcwAAADRYQOfISNL333+vjRs36rXXXtO5c+ckSadOnVJ5eXnQhgMAAKhNQEdkjh07phEjRuj48ePyeDy68847FRkZqeeff14ej0dLliwJ9pwAAACXCOiIzPTp03XTTTfpm2++UevWrX3rx48fr7y8vKANBwAAUJuAjsj8+9//1vbt2xUeHu63Pjk5WV999VVQBgMAEyXP/qDObY7OHx2CSYDmIaAjMl6vV1VVVZesP3nypCIjIy97KAAAgPoIKGSGDx+uRYsW+W67XC6Vl5dr7ty5l33ZAgAAgPoK6KOlF198Uampqbr++ut1/vx53XPPPTp06JDat2+vt956K9gzAgAAVCugkOnYsaM+/fRTrVixQvv371d5ebmmTp2qe++91+/kXwAAgMYUUMhIUlhYmO67775gzgIAANAgAYXMsmXLar3/gQceCGgYAACAhggoZKZPn+53u7KyUt99953Cw8PVpk0bQgYAAIREQH+19M033/gt5eXlKigo0KBBgzjZFwAAhEzA11q6WNeuXTV//vxLjtYAAAA0lqCFjPTDCcCnTp0K5lMCAADUKKBzZP7xj3/43bYsS0VFRfrzn/+sgQMHBmUwAACAugQUMuPGjfO77XK51KFDB91xxx168cUXgzEXAABAnQIKGa/XG+w5AAAAGiyo58gAAACEUkBHZDIyMuq97cKFCwP5EQAAAHUKKGT27t2rvXv3qrKyUt27d5ckffHFF2rZsqVuvPFG33Yulys4UwIAAFQjoJAZM2aMIiMj9cYbb+iqq66S9MOX5E2ZMkW33nqrnnzyyaAOCQAAUJ2AzpF58cUXlZWV5YsYSbrqqqv03HPP8VdLAAAgZAIKmbKyMp05c+aS9WfOnNG5c+cueygAAID6CChkxo8frylTpmjVqlU6efKkTp48qXfffVdTp07VhAkTgj0jAABAtQI6R2bJkiWaOXOm7rnnHlVWVv7wRGFhmjp1ql544YWgDggAAFCTgEKmTZs2evXVV/XCCy+osLBQktS5c2ddccUVQR0OAACgNpf1hXhFRUUqKipS165ddcUVV8iyrGDNBQAAUKeAQubs2bMaOnSounXrplGjRqmoqEiSNHXqVP70GgAAhExAIfPEE0+oVatWOn78uNq0aeNbf9ddd2ndunVBGw4AAKA2AZ0j8+GHH2r9+vXq2LGj3/quXbvq2LFjQRkMAACgLgEdkamoqPA7EvOjr7/+Wm63+7KHAgAAqI+AQubWW2/VsmXLfLddLpe8Xq8WLFigIUOGBG04AACA2gT00dKCBQs0dOhQ7dq1SxcuXNCsWbP02Wef6euvv9ZHH30U7BkBAACqFdARmZ49e+qLL77QoEGDNHbsWFVUVGjChAnau3evOnfuHOwZAQAAqtXgIzKVlZUaMWKElixZot/97neNMRMAAEC9NPiITKtWrbR///7GmAUAAKBBAvpo6b777tPrr78e7FkAAAAaJKCTfb///nstXbpUGzduVN++fS+5xtLChQuDMhwAAEBtGhQyX375pZKTk3Xw4EHdeOONkqQvvvjCbxuXyxW86QAAAGrRoJDp2rWrioqKtHnzZkk/XJLglVdeUVxcXKMMBwAAUJsGnSNz8dWt165dq4qKiqAOBAAAUF8Bnez7o4vDBgAAIJQaFDIul+uSc2A4JwYAANilQefIWJalyZMn+y4Mef78eT366KOX/NXSqlWrgjchAABADRoUMmlpaX6377vvvqAOAwAA0BANCpmcnJzGmgMAAKDBLutkXwAAADsRMgAAwFiEDAAAMBYhAwAAjEXIAAAAYxEyAADAWIQMAAAwlq0hs3XrVo0ZM0aJiYlyuVxas2aN3/2WZemZZ55RQkKCWrdurWHDhunQoUP2DAsAABzH1pCpqKhQ7969lZ2dXe39CxYs0CuvvKIlS5bo448/1hVXXKHU1FSdP38+xJMCAAAnatA3+wbbyJEjNXLkyGrvsyxLixYt0tNPP62xY8dKkpYtW6a4uDitWbNGd999dyhHBQAADuTYc2SOHDmi4uJiDRs2zLcuOjpa/fv3144dO2p8nMfjUVlZmd8CAACaJluPyNSmuLhYkhQXF+e3Pi4uzndfdbKyspSZmdmoswG4fMmzP6hzm6PzR4dgEgAmc+wRmUDNmTNHpaWlvuXEiRN2jwQAABqJY0MmPj5eklRSUuK3vqSkxHdfddxut6KiovwWAADQNDk2ZFJSUhQfH6+8vDzfurKyMn388ccaMGCAjZMBAACnsPUcmfLych0+fNh3+8iRI9q3b59iYmJ09dVXa8aMGXruuefUtWtXpaSk6Pe//70SExM1btw4+4YGAACOYWvI7Nq1S0OGDPHdzsjIkCSlpaUpNzdXs2bNUkVFhR5++GF9++23GjRokNatW6eIiAi7RgYAAA5ia8gMHjxYlmXVeL/L5dKzzz6rZ599NoRTAQAAUzj2HBkAAIC6EDIAAMBYhAwAADAWIQMAAIxFyAAAAGMRMgAAwFiOvWgkmob6XBiwPrh4IICGCtb7D5yNIzIAAMBYhAwAADAWIQMAAIxFyAAAAGMRMgAAwFiEDAAAMBYhAwAAjEXIAAAAYxEyAADAWIQMAAAwFiEDAACMRcgAAABjETIAAMBYhAwAADAWIQMAAIxFyAAAAGMRMgAAwFiEDAAAMBYhAwAAjEXIAAAAYxEyAADAWIQMAAAwFiEDAACMRcgAAABjETIAAMBYhAwAADAWIQMAAIxFyAAAAGMRMgAAwFiEDAAAMBYhAwAAjEXIAAAAYxEyAADAWIQMAAAwFiEDAACMRcgAAABjETIAAMBYhAwAADAWIQMAAIxFyAAAAGOF2T0AAJgiefYHdo8A4CIckQEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYy9Eh83//939yuVx+y7XXXmv3WAAAwCEc/82+PXr00MaNG323w8IcPzIAAAgRx1dBWFiY4uPj7R4DAAA4kKM/WpKkQ4cOKTExUddcc43uvfdeHT9+3O6RAACAQzj6iEz//v2Vm5ur7t27q6ioSJmZmbr11lt18OBBRUZGVvsYj8cjj8fju11WVhaqcQEAQIg5OmRGjhzp+9+9evVS//791alTJ73zzjuaOnVqtY/JyspSZmZmqEYEACBouMJ6wzn+o6Wfatu2rbp166bDhw/XuM2cOXNUWlrqW06cOBHCCQEAQCgZFTLl5eUqLCxUQkJCjdu43W5FRUX5LQAAoGlydMjMnDlT+fn5Onr0qLZv367x48erZcuWmjRpkt2jAQAAB3D0OTInT57UpEmTdPbsWXXo0EGDBg3Szp071aFDB7tHAwAADuDokFmxYoXdIwAAAAdz9EdLAAAAtSFkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsR3+PDGCH+ly07ej80SGYBPx/AaAuHJEBAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADG4urXqJaJVx122sxOmwcAmiKOyAAAAGMRMgAAwFiEDAAAMBYhAwAAjEXIAAAAYxEyAADAWIQMAAAwFiEDAACMRcgAAABjETIAAMBYhAwAADAWIQMAAIzFRSMbWVO+cGB9fjen/SynPU9Txf4B7NWc/g1yRAYAABiLkAEAAMYiZAAAgLEIGQAAYCxCBgAAGIuQAQAAxiJkAACAsQgZAABgLEIGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLq19fhlBfXbQpX0kbAIBAcEQGAAAYi5ABAADGImQAAICxCBkAAGAsQgYAABiLkAEAAMYiZAAAgLEIGQAAYCwjQiY7O1vJycmKiIhQ//799cknn9g9EgAAcADHh8zbb7+tjIwMzZ07V3v27FHv3r2Vmpqq06dP2z0aAACwmeNDZuHChXrooYc0ZcoUXX/99VqyZInatGmjpUuX2j0aAACwmaOvtXThwgXt3r1bc+bM8a1r0aKFhg0bph07dlT7GI/HI4/H47tdWloqSSorKwv6fF7Pd0F5nvrOVp+fF6zfM1i/G2rXGK9LJwjl6yeU+zDU/+ZxeZz2/1dTfV9trNfzj89rWVbtG1oO9tVXX1mSrO3bt/utf+qpp6x+/fpV+5i5c+daklhYWFhYWFiawHLixIlaW8HRR2QCMWfOHGVkZPhue71eff3112rXrp1cLpeNk4VOWVmZkpKSdOLECUVFRdk9jhHYZw3D/moY9lfDsc8apinuL8uydO7cOSUmJta6naNDpn379mrZsqVKSkr81peUlCg+Pr7ax7jdbrndbr91bdu2bawRHS0qKqrJvKBDhX3WMOyvhmF/NRz7rGGa2v6Kjo6ucxtHn+wbHh6uvn37Ki8vz7fO6/UqLy9PAwYMsHEyAADgBI4+IiNJGRkZSktL00033aR+/fpp0aJFqqio0JQpU+weDQAA2MzxIXPXXXfpzJkzeuaZZ1RcXKw+ffpo3bp1iouLs3s0x3K73Zo7d+4lH7GhZuyzhmF/NQz7q+HYZw3TnPeXy7Lq+rsmAAAAZ3L0OTIAAAC1IWQAAICxCBkAAGAsQgYAABiLkDFUdna2kpOTFRERof79++uTTz6pcdvc3Fy5XC6/JSIiIoTT2mvr1q0aM2aMEhMT5XK5tGbNmjofs2XLFt14441yu93q0qWLcnNzG31Op2jo/tqyZcslry+Xy6Xi4uLQDGyzrKws3XzzzYqMjFRsbKzGjRungoKCOh+3cuVKXXvttYqIiNANN9ygf/3rXyGY1hkC2WfN+X1s8eLF6tWrl+/L7gYMGKC1a9fW+pjm9PoiZAz09ttvKyMjQ3PnztWePXvUu3dvpaam6vTp0zU+JioqSkVFRb7l2LFjIZzYXhUVFerdu7eys7Prtf2RI0c0evRoDRkyRPv27dOMGTP0y1/+UuvXr2/kSZ2hofvrRwUFBX6vsdjY2Eaa0Fny8/OVnp6unTt3asOGDaqsrNTw4cNVUVFR42O2b9+uSZMmaerUqdq7d6/GjRuncePG6eDBgyGc3D6B7DOp+b6PdezYUfPnz9fu3bu1a9cu3XHHHRo7dqw+++yzardvdq+v4FzeEaHUr18/Kz093Xe7qqrKSkxMtLKysqrdPicnx4qOjg7RdM4myVq9enWt28yaNcvq0aOH37q77rrLSk1NbcTJnKk++2vz5s2WJOubb74JyUxOd/r0aUuSlZ+fX+M2v/jFL6zRo0f7revfv7/1yCOPNPZ4jlSffcb7mL+rrrrK+stf/lLtfc3t9cURGcNcuHBBu3fv1rBhw3zrWrRooWHDhmnHjh01Pq68vFydOnVSUlJSrSUPaceOHX77V5JSU1Nr3b+Q+vTpo4SEBN1555366KOP7B7HNqWlpZKkmJiYGrfhNeavPvtM4n1MkqqqqrRixQpVVFTUeKme5vb6ImQM89///ldVVVWXfLNxXFxcjeckdO/eXUuXLtV7772nN998U16vV7fccotOnjwZipGNU1xcXO3+LSsr0//+9z+bpnKuhIQELVmyRO+++67effddJSUlafDgwdqzZ4/do4Wc1+vVjBkzNHDgQPXs2bPG7Wp6jTWX84p+qr77rLm/jx04cEBXXnml3G63Hn30Ua1evVrXX399tds2t9eX4y9RgMs3YMAAv3K/5ZZbdN111+m1117TvHnzbJwMTUH37t3VvXt33+1bbrlFhYWFeumll/TXv/7VxslCLz09XQcPHtS2bdvsHsUY9d1nzf19rHv37tq3b59KS0v197//XWlpacrPz68xZpoTjsgYpn379mrZsqVKSkr81peUlCg+Pr5ez9GqVSv97Gc/0+HDhxtjROPFx8dXu3+joqLUunVrm6YyS79+/Zrd62vatGl6//33tXnzZnXs2LHWbWt6jdX333BT0ZB9drHm9j4WHh6uLl26qG/fvsrKylLv3r318ssvV7ttc3t9ETKGCQ8PV9++fZWXl+db5/V6lZeXV+PnpRerqqrSgQMHlJCQ0FhjGm3AgAF++1eSNmzYUO/9C2nfvn3N5vVlWZamTZum1atXa9OmTUpJSanzMc39NRbIPrtYc38f83q98ng81d7X7F5fdp9tjIZbsWKF5Xa7rdzcXOs///mP9fDDD1tt27a1iouLLcuyrPvvv9+aPXu2b/vMzExr/fr1VmFhobV7927r7rvvtiIiIqzPPvvMrl8hpM6dO2ft3bvX2rt3ryXJWrhwobV3717r2LFjlmVZ1uzZs63777/ft/2XX35ptWnTxnrqqaeszz//3MrOzrZatmxprVu3zq5fIaQaur9eeukla82aNdahQ4esAwcOWNOnT7datGhhbdy40a5fIaQee+wxKzo62tqyZYtVVFTkW7777jvfNhf/m/zoo4+ssLAw649//KP1+eefW3PnzrVatWplHThwwI5fIeQC2WfN+X1s9uzZVn5+vnXkyBFr//791uzZsy2Xy2V9+OGHlmXx+iJkDPWnP/3Juvrqq63w8HCrX79+1s6dO3333X777VZaWprv9owZM3zbxsXFWaNGjbL27Nljw9T2+PHPgy9eftxHaWlp1u23337JY/r06WOFh4db11xzjZWTkxPyue3S0P31/PPPW507d7YiIiKsmJgYa/DgwdamTZvsGd4G1e0rSX6vmYv/TVqWZb3zzjtWt27drPDwcKtHjx7WBx98ENrBbRTIPmvO72MPPvig1alTJys8PNzq0KGDNXToUF/EWBavL5dlWVbojv8AAAAED+fIAAAAYxEyAADAWIQMAAAwFiEDAACMRcgAAABjETIAAMBYhAwAADAWIQMAAIxFyAAAAGMRMgAAwFiEDAAAMBYhAwAAjPX/ftOVRomlLQ0AAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cdf['ttft'].plot.hist(bins=50)" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "cd59c882", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '',\n", - " '']" - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(df[df['valid'] == 'OK']['cause'].to_numpy())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "51469ab5", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91d5b583-b052-4e07-a063-47c8e10ede52", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/analyze-token-benchmark-results.ipynb b/analyze-token-benchmark-results.ipynb new file mode 100644 index 0000000..d6c5a45 --- /dev/null +++ b/analyze-token-benchmark-results.ipynb @@ -0,0 +1,327 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "56950450", + "metadata": {}, + "source": [ + "# Token Benchmark Example Analysis\n", + "The following is an example of the analysis that can be done on individual responses that are saved when running `token_benchmark_ray.py` with the flag `--results-dir` which enables the saving of all responses." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dacfe98a-e81b-4089-9506-97a652993b5b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "17f7abe9-ed9e-466c-b034-577489aaf98b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
error_codeerror_msginter_token_latency_sttft_send_to_end_latency_srequest_output_throughput_token_per_snumber_total_tokensnumber_output_tokensnumber_input_tokens
0NaN[0.5549881670012831, 0.0009654169989510001, 0....0.5549881.61073444.07927270671635
1NaN[0.6019128750049271, 0.007011749999946, 0.0144...0.6019131.72572944.03935773076654
\n", + "
" + ], + "text/plain": [ + " error_code error_msg inter_token_latency_s \\\n", + "0 NaN [0.5549881670012831, 0.0009654169989510001, 0.... \n", + "1 NaN [0.6019128750049271, 0.007011749999946, 0.0144... \n", + "\n", + " ttft_s end_to_end_latency_s request_output_throughput_token_per_s \\\n", + "0 0.554988 1.610734 44.079272 \n", + "1 0.601913 1.725729 44.039357 \n", + "\n", + " number_total_tokens number_output_tokens number_input_tokens \n", + "0 706 71 635 \n", + "1 730 76 654 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# path to the individual responses json file\n", + "df = pd.read_json('/home/ray/default/llmperf/result_outputs/550_150_individual_responses.json')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "565a59e4", + "metadata": {}, + "outputs": [], + "source": [ + "valid_df = df[(df[\"error_code\"] != \"\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "102894bc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
error_codeerror_msginter_token_latency_sttft_send_to_end_latency_srequest_output_throughput_token_per_snumber_total_tokensnumber_output_tokensnumber_input_tokens
0NaN[0.5549881670012831, 0.0009654169989510001, 0....0.5549881.61073444.07927270671635
1NaN[0.6019128750049271, 0.007011749999946, 0.0144...0.6019131.72572944.03935773076654
\n", + "
" + ], + "text/plain": [ + " error_code error_msg inter_token_latency_s \\\n", + "0 NaN [0.5549881670012831, 0.0009654169989510001, 0.... \n", + "1 NaN [0.6019128750049271, 0.007011749999946, 0.0144... \n", + "\n", + " ttft_s end_to_end_latency_s request_output_throughput_token_per_s \\\n", + "0 0.554988 1.610734 44.079272 \n", + "1 0.601913 1.725729 44.039357 \n", + "\n", + " number_total_tokens number_output_tokens number_input_tokens \n", + "0 706 71 635 \n", + "1 730 76 654 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "valid_df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c7519fc9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean number of input tokens: 644.5. Mean number of output tokens: 73.5\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+u0lEQVR4nO3deVgW9f7/8dcNsqrgAgIqgop7uYSKYIkVbp1TVp4yWzBOmpWmRllRuWSLmll2mSeXcknLXKqv+tP0JLlUmpZmaZnghkuCogKiBgmf3x9d3MdbFsFYnefjuua6vD8z85nPe4aBlzNz37fNGGMEAABgIU4VPQAAAIDyRgACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACKsCGDRtks9m0bNmyih5KsaSkpOhf//qX6tatK5vNpqlTp1b0kCq1cePGyWazKTU1taKHAqAQBCBcs+bNmyebzSZ3d3cdO3Ys3/zu3bvruuuuq4CRVT1PPfWU1q5dq7i4OC1YsEC9e/cudFmbzaZhw4aV4+iK9vHHHxcrsOWFlitN3bt3L/MxX+uKs59LMm3YsEGHDh0qdH6XLl04vsinWkUPAChrWVlZmjhxoqZNm1bRQ6myvvrqK/Xt21fPPPNMRQ+lxD7++GPt3r1bI0eOLHK5u+++WyEhIfbXmZmZevzxx3XXXXfp7rvvtrf7+fmV1VAtY8GCBQ6vP/zwQ3355Zf52nNycuTs7HzF5Vq1aqULFy5IkgYMGKDbbrvNYb6vr68CAgI4vnBAAMI1r3379po9e7bi4uJUv379ih5OuTp37pyqV6/+t/s5ceKEatWq9fcHVIm1bdtWbdu2tb9OTU3V448/rrZt2+rBBx+swJFdey7fn999952+/PLLK+7nopY7dOiQJOmGG24otB+OLy7FLTBc81544QXl5ORo4sSJRS6Xdwl93rx5+ebZbDaNGzfO/jrvcnpCQoIefPBBeXt7y9fXV6NHj5YxRkeOHFHfvn3l5eUlf39/TZkypcBt5uTk6IUXXpC/v7+qV6+uO+64Q0eOHMm33NatW9W7d295e3vL09NTkZGR+vbbbx2WyRvTr7/+qvvvv1+1a9fWjTfeWGTNBw4c0D333KM6derI09NTXbp00apVq+zz824jGmM0ffp0+22Cksh73mnJkiV67bXX1LBhQ7m7u+vWW2/Vvn37HJbNuy25fft2RUREyMPDQ40bN9aMGTMclssbV94fvcu3tWHDBnt/q1atUlJSkn3swcHBJRr/5b766ivddNNNql69umrVqqW+fftqz549V1wvKSlJISEhuu6665SSkiJJSktL08iRIxUYGCg3NzeFhIRo0qRJys3Nta+X93P55ptvatasWWratKnc3NzUqVMnff/99w7bSE5OVkxMjBo2bCg3NzcFBASob9+++fbTpd58803ZbDYlJSXlmxcXFydXV1edOXNGkpSYmKh+/frJ399f7u7uatiwoe677z6lp6cXZ9cBlQpXgHDNa9y4saKjozV79mw9//zzpXoVqH///mrVqpUmTpyoVatW6dVXX1WdOnU0c+ZM3XLLLZo0aZI++ugjPfPMM+rUqZO6devmsP5rr70mm82m5557TidOnNDUqVMVFRWlnTt3ysPDQ9Jff3D79Omj0NBQjR07Vk5OTpo7d65uueUWff311+rcubNDn/fcc4+aNWum119/XcaYQseekpKiiIgInT9/XsOHD1fdunU1f/583XHHHVq2bJnuuusudevWTQsWLNBDDz2kHj16KDo6+qr31cSJE+Xk5KRnnnlG6enpeuONN/TAAw9o69atDsudOXNGt912m+69914NGDBAS5Ys0eOPPy5XV1f9+9//LtE2X3zxRaWnp+vo0aN6++23JUk1atS46hrWrVunPn36qEmTJho3bpwuXLigadOmqWvXrtqxY0eh4Wr//v265ZZbVKdOHX355Zfy8fHR+fPnFRkZqWPHjmnIkCFq1KiRNm/erLi4OB0/fjzfc0sff/yxzp49qyFDhshms+mNN97Q3XffrQMHDsjFxUWS1K9fP/3yyy968sknFRwcrBMnTujLL7/U4cOHCx3bvffeq2effVZLlizRqFGjHOYtWbJEPXv2VO3atZWdna1evXopKytLTz75pPz9/XXs2DH9v//3/5SWliZvb++r3q+l6fz58/kePvf29rbvI8DOANeouXPnGknm+++/N/v37zfVqlUzw4cPt8+PjIw0bdq0sb8+ePCgkWTmzp2bry9JZuzYsfbXY8eONZLMo48+am+7ePGiadiwobHZbGbixIn29jNnzhgPDw8zcOBAe9v69euNJNOgQQOTkZFhb1+yZImRZN555x1jjDG5ubmmWbNmplevXiY3N9e+3Pnz503jxo1Njx498o1pwIABxdo/I0eONJLM119/bW87e/asady4sQkODjY5OTkO9Q8dOrRY/V6+bF6trVq1MllZWfb2d955x0gyu3btsrdFRkYaSWbKlCn2tqysLNO+fXtTr149k52dbYz537E9ePCgw7bztrV+/Xp72z/+8Q8TFBRUrLFf6uTJk/mOe944Tp06ZW/76aefjJOTk4mOjra35R2LkydPmj179pj69eubTp06mdOnT9uXeeWVV0z16tVNQkKCw3aff/554+zsbA4fPmyM+d/PZd26dR3WX758uZFkVq5caYz56+dMkpk8eXKJaw0PDzehoaEObdu2bTOSzIcffmiMMebHH380kszSpUtL3P+VDB061BTnz1FRy+Xtp4KmS38e8hR0fGEt3AKDJTRp0kQPPfSQZs2apePHj5dav4MGDbL/29nZWR07dpQxRo888oi9vVatWmrRooUOHDiQb/3o6GjVrFnT/vpf//qXAgICtHr1aknSzp07lZiYqPvvv1+nTp1SamqqUlNTde7cOd16663atGmTw+0SSXrssceKNfbVq1erc+fODrfJatSooUcffVSHDh3Sr7/+WrydUEwxMTFydXW1v77pppskKd9+qVatmoYMGWJ/7erqqiFDhujEiRPavn17qY6pJI4fP66dO3fq4YcfVp06deztbdu2VY8ePezH7FK7d+9WZGSkgoODtW7dOtWuXds+b+nSpbrppptUu3Zt+3FNTU1VVFSUcnJytGnTJoe++vfv77D+5fvPw8NDrq6u2rBhg/2WVXH1799f27dv1/79++1tixcvlpubm/r27StJ9is8a9eu1fnz50vUf3l69NFH9eWXXzpM7dq1q+hhoRIiAMEyXnrpJV28ePGKzwKVRKNGjRxee3t7y93dXT4+PvnaC/qj1KxZM4fXNptNISEh9mc2EhMTJUkDBw6Ur6+vw/T+++8rKysr3/MXjRs3LtbYk5KS1KJFi3ztrVq1ss8vTZfvq7w/5pfvl/r16+d7cLt58+aSVOSzLGUtb38Uts/ygumlbr/9dtWsWVNr166Vl5eXw7zExEStWbMm33GNioqS9NeD55e60v5zc3PTpEmT9MUXX8jPz0/dunXTG2+8oeTk5CvWds8998jJyUmLFy+WJBljtHTpUvXp08c+7saNGys2Nlbvv/++fHx81KtXL02fPr3SPf/TrFkzRUVFOUyXBkcgDwEIltGkSRM9+OCDhV4FKuzh3pycnEL7vPQtukW1SSryeZzC5F3dmTx5cr7/1eZNlz/TkvfsUGVTmvvlao5VRejXr5/279+vjz76KN+83Nxc9ejRo9Dj2q9fP4fli7P/Ro4cqYSEBE2YMEHu7u4aPXq0WrVqpR9//LHIcdavX1833XSTlixZIumvd1sdPnxY/fv3d1huypQp+vnnn/XCCy/owoULGj58uNq0aaOjR48Wa38AlQkPQcNSXnrpJS1cuFCTJk3KNy/vf4lpaWkO7aV9JeRSeVd48hhjtG/fPvvbdZs2bSpJ8vLysl8ZKC1BQUHau3dvvvbffvvNPr8i/P777/nevp+QkCBJ9gd5S3KsSvqutcLk7Y/C9pmPj0++K1eTJ09WtWrV9MQTT6hmzZq6//777fOaNm2qzMzMUj+uTZs21dNPP62nn35aiYmJat++vaZMmaKFCxcWuV7//v31xBNPaO/evVq8eLE8PT11++2351vu+uuv1/XXX6+XXnpJmzdvVteuXTVjxgy9+uqrpVoHUNa4AgRLadq0qR588EHNnDkz360BLy8v+fj45Hv24j//+U+ZjefDDz/U2bNn7a+XLVum48ePq0+fPpKk0NBQNW3aVG+++aYyMzPzrX/y5Mmr3vZtt92mbdu2acuWLfa2c+fOadasWQoODlbr1q2vuu+/4+LFi5o5c6b9dXZ2tmbOnClfX1+FhoZK+l8wvPRY5eTkaNasWfn6q169eqncpgkICFD79u01f/58h+C1e/du/fe//8334XvSX+Fr1qxZ+te//qWBAwdqxYoV9nn33nuvtmzZorVr1+ZbLy0tTRcvXizR+M6fP68//vjDoa1p06aqWbOmsrKyrrh+v3795OzsrEWLFmnp0qX65z//6RDoMjIy8o3p+uuvl5OTk0P/hw8ftodooDLjChAs58UXX9SCBQu0d+9etWnTxmHeoEGDNHHiRA0aNEgdO3bUpk2b7FcfykKdOnV04403KiYmRikpKZo6dapCQkI0ePBgSZKTk5Pef/999enTR23atFFMTIwaNGigY8eOaf369fLy8tLKlSuvatvPP/+8Fi1apD59+mj48OGqU6eO5s+fr4MHD+rTTz+Vk1PF/P+ofv36mjRpkg4dOqTmzZtr8eLF2rlzp2bNmmV/K3ObNm3UpUsXxcXF6fTp06pTp44++eSTAkNDaGioFi9erNjYWHXq1Ek1atQo8MpGcUyePFl9+vRReHi4HnnkEfvb4L29vR0+J+pSTk5OWrhwoe68807de++9Wr16tW655RaNGjVKK1as0D//+U89/PDDCg0N1blz57Rr1y4tW7ZMhw4dyvcsWVESEhJ066236t5771Xr1q1VrVo1ff7550pJSdF99913xfXr1aunm2++WW+99ZbOnj2b7/bXV199pWHDhumee+5R8+bNdfHiRS1YsEDOzs4Ot+uio6O1cePGq7q1CZQnAhAsJyQkRA8++KDmz5+fb96YMWN08uRJLVu2TEuWLFGfPn30xRdfqF69emUylhdeeEE///yzJkyYoLNnz+rWW2/Vf/7zH3l6etqX6d69u7Zs2aJXXnlF7777rjIzM+Xv76+wsDCHd0uVlJ+fnzZv3qznnntO06ZN0x9//KG2bdtq5cqV+sc//lEa5V2V2rVra/78+XryySc1e/Zs+fn56d1337WHwjwfffSRhgwZookTJ6pWrVp65JFHdPPNN6tHjx4Oyz3xxBPauXOn5s6dq7fffltBQUFXHYCioqK0Zs0ajR07VmPGjJGLi4siIyM1adKkIh8+d3Fx0bJly9SnTx/17dtX69atU1hYmDZu3KjXX39dS5cu1YcffigvLy81b95cL7/8cok/VycwMFADBgxQfHy8FixYoGrVqqlly5ZasmRJvueJCtO/f3+tW7dONWvWzHdFq127durVq5dWrlypY8eOydPTU+3atdMXX3yhLl26lGisQGVgM8R0AJVE9+7dlZqaqt27d1f0UABc43gGCAAAWA4BCAAAWA4BCAAAWA7PAAEAAMvhChAAALAcAhAAALAcPgeoALm5ufr9999Vs2bNUvsYfQAAULaMMTp79qzq169/xQ9zJQAV4Pfff1dgYGBFDwMAAFyFI0eOqGHDhkUuQwAqQM2aNSX9tQO9vLwqeDQAAKA4MjIyFBgYaP87XhQCUAHybnt5eXkRgAAAqGKK8/gKD0EDAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADL4aswAABAuTpwMlNJp88ruG51NfapXiFjIAABAIBykXY+W8MX7dSmxJP2tm7NfDVtQAd5e7qU61i4BQYAAMrF8EU79e2+VIe2b/el6slFP5b7WAhAAACgzB04malNiSeVY4xDe44x2pR4UgdTz5XreAhAAACgzCWdPl/k/EOnCEAAAOAaE1THs8j5wXXL92FoAhAAAChzTXxrqFszXznbbA7tzjabujXzLfd3gxGAAABAuZg2oIO6hvg4tHUN8dG0AR3KfSy8DR4AAJQLb08XffhIZx1MPadDp87xOUAAAMA6GvtUXPDJwy0wAABgOQQgAABgOQQgAABgOZUiAE2fPl3BwcFyd3dXWFiYtm3bVuTyaWlpGjp0qAICAuTm5qbmzZtr9erVf6tPAABgHRUegBYvXqzY2FiNHTtWO3bsULt27dSrVy+dOHGiwOWzs7PVo0cPHTp0SMuWLdPevXs1e/ZsNWjQ4Kr7BAAA1mIz5rIv5ShnYWFh6tSpk959911JUm5urgIDA/Xkk0/q+eefz7f8jBkzNHnyZP32229ycSn4m2NL2uflMjIy5O3trfT0dHl5ef2N6gAAQHkpyd/vCr0ClJ2dre3btysqKsre5uTkpKioKG3ZsqXAdVasWKHw8HANHTpUfn5+uu666/T6668rJyfnqvvMyspSRkaGwwQAAK5dFRqAUlNTlZOTIz8/P4d2Pz8/JScnF7jOgQMHtGzZMuXk5Gj16tUaPXq0pkyZoldfffWq+5wwYYK8vb3tU2BgYClUBwAAKqsKfwaopHJzc1WvXj3NmjVLoaGh6t+/v1588UXNmDHjqvuMi4tTenq6fTpy5EgpjhgAAFQ2FfpJ0D4+PnJ2dlZKSopDe0pKivz9/QtcJyAgQC4uLnJ2dra3tWrVSsnJycrOzr6qPt3c3OTm5vY3qwEAAFVFhV4BcnV1VWhoqOLj4+1tubm5io+PV3h4eIHrdO3aVfv27VNubq69LSEhQQEBAXJ1db2qPgEAgLVU+C2w2NhYzZ49W/Pnz9eePXv0+OOP69y5c4qJiZEkRUdHKy4uzr78448/rtOnT2vEiBFKSEjQqlWr9Prrr2vo0KHF7hMAAFhbhX8Zav/+/XXy5EmNGTNGycnJat++vdasWWN/iPnw4cNycvpfTgsMDNTatWv11FNPqW3btmrQoIFGjBih5557rth9AgAAa6vwzwGqjPgcIAAAqp4q8zlAAAAAFYEABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALKdSBKDp06crODhY7u7uCgsL07Zt2wpddt68ebLZbA6Tu7u7wzIpKSl6+OGHVb9+fXl6eqp3795KTEws6zIAAEAVUeEBaPHixYqNjdXYsWO1Y8cOtWvXTr169dKJEycKXcfLy0vHjx+3T0lJSfZ5xhjdeeedOnDggJYvX64ff/xRQUFBioqK0rlz58qjJAAAUMlVeAB66623NHjwYMXExKh169aaMWOGPD09NWfOnELXsdls8vf3t09+fn72eYmJifruu+/03nvvqVOnTmrRooXee+89XbhwQYsWLSqPkgAAQCVXoQEoOztb27dvV1RUlL3NyclJUVFR2rJlS6HrZWZmKigoSIGBgerbt69++eUX+7ysrCxJcrgt5uTkJDc3N33zzTcF9peVlaWMjAyHCQAAXLsqNAClpqYqJyfH4QqOJPn5+Sk5ObnAdVq0aKE5c+Zo+fLlWrhwoXJzcxUREaGjR49Kklq2bKlGjRopLi5OZ86cUXZ2tiZNmqSjR4/q+PHjBfY5YcIEeXt726fAwMDSLRQAAFQqFX4LrKTCw8MVHR2t9u3bKzIyUp999pl8fX01c+ZMSZKLi4s+++wzJSQkqE6dOvL09NT69evVp08fOTkVXG5cXJzS09Pt05EjR8qzJAAAUM6qVeTGfXx85OzsrJSUFIf2lJQU+fv7F6sPFxcXdejQQfv27bO3hYaGaufOnUpPT1d2drZ8fX0VFhamjh07FtiHm5ub3Nzcrr4QAABQpVToFSBXV1eFhoYqPj7e3pabm6v4+HiFh4cXq4+cnBzt2rVLAQEB+eZ5e3vL19dXiYmJ+uGHH9S3b99SGzsAAKi6KvQKkCTFxsZq4MCB6tixozp37qypU6fq3LlziomJkSRFR0erQYMGmjBhgiRp/Pjx6tKli0JCQpSWlqbJkycrKSlJgwYNsve5dOlS+fr6qlGjRtq1a5dGjBihO++8Uz179qyQGgEAQOVS4QGof//+OnnypMaMGaPk5GS1b99ea9assT8YffjwYYdnd86cOaPBgwcrOTlZtWvXVmhoqDZv3qzWrVvblzl+/LhiY2OVkpKigIAARUdHa/To0eVeGwAAqJxsxhhT0YOobDIyMuTt7a309HR5eXlV9HAAAEAxlOTvd5V7FxgAAMDfRQACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWc1UBaMeOHdq1a5f99fLly3XnnXfqhRdeUHZ2don7mz59uoKDg+Xu7q6wsDBt27at0GXnzZsnm83mMLm7uzssk5mZqWHDhqlhw4by8PBQ69atNWPGjBKPCwAAXJuuKgANGTJECQkJkqQDBw7ovvvuk6enp5YuXapnn322RH0tXrxYsbGxGjt2rHbs2KF27dqpV69eOnHiRKHreHl56fjx4/YpKSnJYX5sbKzWrFmjhQsXas+ePRo5cqSGDRumFStWlLxYAABwzbmqAJSQkKD27dtLkpYuXapu3brp448/1rx58/Tpp5+WqK+33npLgwcPVkxMjP1Kjaenp+bMmVPoOjabTf7+/vbJz8/PYf7mzZs1cOBAde/eXcHBwXr00UfVrl27Iq8sAQAA67iqAGSMUW5uriRp3bp1uu222yRJgYGBSk1NLXY/2dnZ2r59u6Kiov43ICcnRUVFacuWLYWul5mZqaCgIAUGBqpv37765ZdfHOZHRERoxYoVOnbsmIwxWr9+vRISEtSzZ88C+8vKylJGRobDBAAArl1XFYA6duyoV199VQsWLNDGjRv1j3/8Q5J08ODBfFdjipKamqqcnJx86/j5+Sk5ObnAdVq0aKE5c+Zo+fLlWrhwoXJzcxUREaGjR4/al5k2bZpat26thg0bytXVVb1799b06dPVrVu3AvucMGGCvL297VNgYGCxawAAAFXPVQWgqVOnaseOHRo2bJhefPFFhYSESJKWLVumiIiIUh3g5cLDwxUdHa327dsrMjJSn332mXx9fTVz5kz7MtOmTdN3332nFStWaPv27ZoyZYqGDh2qdevWFdhnXFyc0tPT7dORI0fKtAYAAFCxql3NSm3btnV4F1ieyZMny9nZ2f560aJFuuOOO1S9evUC+/Hx8ZGzs7NSUlIc2lNSUuTv71+ssbi4uKhDhw7at2+fJOnChQt64YUX9Pnnn9uvTLVt21Y7d+7Um2++6XC7LY+bm5vc3NyKtT0AAFD1lernALm7u8vFxcX+esiQIfnCzaVcXV0VGhqq+Ph4e1tubq7i4+MVHh5erG3m5ORo165dCggIkCT9+eef+vPPP+Xk5Fias7Oz/bklAABgbVd1Bai4jDFXXCY2NlYDBw5Ux44d1blzZ02dOlXnzp1TTEyMJCk6OloNGjTQhAkTJEnjx49Xly5dFBISorS0NE2ePFlJSUkaNGiQpL/eIh8ZGalRo0bJw8NDQUFB2rhxoz788EO99dZbZVcsAACoMso0ABVH//79dfLkSY0ZM0bJyclq37691qxZY38w+vDhww5Xc86cOaPBgwcrOTlZtWvXVmhoqDZv3qzWrVvbl/nkk08UFxenBx54QKdPn1ZQUJBee+01PfbYY+VeHwAAqHxspjiXaa5SzZo19dNPP6lJkyZltYkykZGRIW9vb6Wnp8vLy6uihwMAAIqhJH+/+S4wAABgOQQgAABgOWUagIKCghzeFQYAAFAZXFUAatKkiU6dOpWvPS0tzeF5n927d/OpygAAoNK5qgB06NAh5eTk5GvPysrSsWPH/vagAAAAylKJ3ga/YsUK+7/Xrl0rb29v++ucnBzFx8crODi41AYHAABQFkoUgO688077vwcOHOgwz8XFRcHBwZoyZUqpDAwAAKCsFDsA/fzzz/rzzz/l7Oysxo0b6/vvv5ePj09Zjg0AAKBMFPsZoA4dOuj06dOSJJvNJpvNVmaDAgAAKEvFDkC1atXSgQMHJElJSUl8sSgAAKiyin0LrF+/foqMjLR/63rHjh3l7Oxc4LJ5QQkAAKAyKnYAmjVrlu6++27t27dPw4cP1+DBg1WzZs2yHBsAAECZKNG7wHr37i1J2r59u0aMGEEAAgAAVdJVfRBiYQ9Anzt3Tv/+97//1oAAAADK2lUFoPnz5+vChQv52i9cuKAPP/zwbw8KAACgLJXoFlhGRoaMMTLG6OzZs3J3d7fPy8nJ0erVq1WvXr1SHyQAAEBpKlEAqlWrlv0zgJo3b55vvs1m08svv1xqgwMAACgLJQpA69evlzFGt9xyi5YtW6a6deva57m6uiooKEgXL14s9UECAACUphIFoMjISPu/w8PD7Z8JlOfUqVMKDAws8JviAQAAKoureghakqpVy5+dMjMzHZ4LAgAAqIxKdAUoNjZW0l/P+owePVqenp72eTk5Odq6davat29fqgMEAAAobSUKQD/++KMkyRijXbt2ydXV1T7P1dVV7dq10zPPPFO6IwQAAChlJX4IWpJiYmL0zjvvyMvLq0wGBQAAUJZKFIDyzJ07t7THAQAAUG6u+iFoAACAqooABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALKdSBKDp06crODhY7u7uCgsL07Zt2wpddt68ebLZbA6Tu7u7wzKXz8+bJk+eXNalAACAKqDCA9DixYsVGxursWPHaseOHWrXrp169eqlEydOFLqOl5eXjh8/bp+SkpIc5l867/jx45ozZ45sNpv69etX1uUAAIAqoMID0FtvvaXBgwcrJiZGrVu31owZM+Tp6ak5c+YUuo7NZpO/v7998vPzc5h/6Tx/f38tX75cN998s5o0aVLW5QAAgCqgQgNQdna2tm/frqioKHubk5OToqKitGXLlkLXy8zMVFBQkAIDA9W3b1/98ssvhS6bkpKiVatW6ZFHHil0maysLGVkZDhMAADg2lWhASg1NVU5OTn5ruD4+fkpOTm5wHVatGihOXPmaPny5Vq4cKFyc3MVERGho0ePFrj8/PnzVbNmTd19992FjmPChAny9va2T4GBgVdfFAAAqPQq/BZYSYWHhys6Olrt27dXZGSkPvvsM/n6+mrmzJkFLj9nzhw98MAD+R6UvlRcXJzS09Pt05EjR8pq+AAAoBKoVpEb9/HxkbOzs1JSUhzaU1JS5O/vX6w+XFxc1KFDB+3bty/fvK+//lp79+7V4sWLi+zDzc1Nbm5uxR84AACo0ir0CpCrq6tCQ0MVHx9vb8vNzVV8fLzCw8OL1UdOTo527dqlgICAfPM++OADhYaGql27dqU2ZgAAUPVV6BUgSYqNjdXAgQPVsWNHde7cWVOnTtW5c+cUExMjSYqOjlaDBg00YcIESdL48ePVpUsXhYSEKC0tTZMnT1ZSUpIGDRrk0G9GRoaWLl2qKVOmlHtNAACgcqvwANS/f3+dPHlSY8aMUXJystq3b681a9bYH4w+fPiwnJz+d6HqzJkzGjx4sJKTk1W7dm2FhoZq8+bNat26tUO/n3zyiYwxGjBgQLnWAwAAKj+bMcZU9CAqm4yMDHl7eys9PV1eXl4VPRwAAFAMJfn7XeXeBQYAAPB3EYAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlVIoANH36dAUHB8vd3V1hYWHatm1bocvOmzdPNpvNYXJ3d8+33J49e3THHXfI29tb1atXV6dOnXT48OGyLAMAAFQRFR6AFi9erNjYWI0dO1Y7duxQu3bt1KtXL504caLQdby8vHT8+HH7lJSU5DB///79uvHGG9WyZUtt2LBBP//8s0aPHl1gUAIAANZjM8aYihxAWFiYOnXqpHfffVeSlJubq8DAQD355JN6/vnn8y0/b948jRw5UmlpaYX2ed9998nFxUULFiy4qjFlZGTI29tb6enp8vLyuqo+AABA+SrJ3+8KvQKUnZ2t7du3Kyoqyt7m5OSkqKgobdmypdD1MjMzFRQUpMDAQPXt21e//PKLfV5ubq5WrVql5s2bq1evXqpXr57CwsL0f//3f4X2l5WVpYyMDIcJAABcuyo0AKWmpionJ0d+fn4O7X5+fkpOTi5wnRYtWmjOnDlavny5Fi5cqNzcXEVEROjo0aOSpBMnTigzM1MTJ05U79699d///ld33XWX7r77bm3cuLHAPidMmCBvb2/7FBgYWLqFAgCASqVaRQ+gpMLDwxUeHm5/HRERoVatWmnmzJl65ZVXlJubK0nq27evnnrqKUlS+/bttXnzZs2YMUORkZH5+oyLi1NsbKz9dUZGBiEIAIBrWIUGIB8fHzk7OyslJcWhPSUlRf7+/sXqw8XFRR06dNC+ffvsfVarVk2tW7d2WK5Vq1b65ptvCuzDzc1Nbm5uV1EBAACoiir0Fpirq6tCQ0MVHx9vb8vNzVV8fLzDVZ6i5OTkaNeuXQoICLD32alTJ+3du9dhuYSEBAUFBZXe4AEAQJVV4bfAYmNjNXDgQHXs2FGdO3fW1KlTde7cOcXExEiSoqOj1aBBA02YMEGSNH78eHXp0kUhISFKS0vT5MmTlZSUpEGDBtn7HDVqlPr3769u3brp5ptv1po1a7Ry5Upt2LChIkoEAACVTIUHoP79++vkyZMaM2aMkpOT1b59e61Zs8b+YPThw4fl5PS/C1VnzpzR4MGDlZycrNq1ays0NFSbN292uOV11113acaMGZowYYKGDx+uFi1a6NNPP9WNN95Y7vUBAIDKp8I/B6gy4nOAAACoeqrM5wABAABUBAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwnGoVPQCrOXAyU0mnzyu4bnU19qle0cMBAMCSCEDlJO18toYv2qlNiSftbd2a+WragA7y9nSpwJEBAGA93AIrJ8MX7dS3+1Id2r7dl6onF/1YQSMCAMC6CEDl4MDJTG1KPKkcYxzac4zRpsSTOph6roJGBgCANRGAykHS6fNFzj90igAEAEB5IgCVg6A6nkXOD67Lw9AAAJQnAlA5aOJbQ92a+crZZnNod7bZ1K2ZL+8GAwCgnBGAysm0AR3UNcTHoa1riI+mDehQQSMCAMC6eBt8OfH2dNGHj3TWwdRzOnTqHJ8DBABABSIAlbPGPgQfAAAqGrfAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5fBVGAUwxkiSMjIyKngkAACguPL+buf9HS8KAagAZ8+elSQFBgZW8EgAAEBJnT17Vt7e3kUuYzPFiUkWk5ubq99//101a9aUzWYr1b4zMjIUGBioI0eOyMvLq1T7rmyo9dplpXqp9dplpXqtUqsxRmfPnlX9+vXl5FT0Uz5cASqAk5OTGjZsWKbb8PLyuqZ/CC9FrdcuK9VLrdcuK9VrhVqvdOUnDw9BAwAAyyEAAQAAyyEAlTM3NzeNHTtWbm5uFT2UMket1y4r1Uut1y4r1WulWouLh6ABAIDlcAUIAABYDgEIAABYDgEIAABYDgEIAABYDgGoBI4dO6YHH3xQdevWlYeHh66//nr98MMP9vnjxo1Ty5YtVb16ddWuXVtRUVHaunWrQx/BwcGy2WwO08SJE4vc7h9//KGhQ4eqbt26qlGjhvr166eUlJQyqTHP3611w4YN+erMm77//vtCt9u9e/d8yz/22GNlWqt05Xov9dhjj8lms2nq1KkO7adPn9YDDzwgLy8v1apVS4888ogyMzOL3G5lPLaXKqjWQ4cO6ZFHHlHjxo3l4eGhpk2bauzYscrOzi5yuxVxbEvjuFaVc1b6+/VWpfP2SrU+/PDD+cbUu3dvhz6ulXP2SrVWpXO2PPFJ0MV05swZde3aVTfffLO++OIL+fr6KjExUbVr17Yv07x5c7377rtq0qSJLly4oLfffls9e/bUvn375Ovra19u/PjxGjx4sP11zZo1i9z2U089pVWrVmnp0qXy9vbWsGHDdPfdd+vbb78t/UJVOrVGRETo+PHjDv2OHj1a8fHx6tixY5HbHzx4sMaPH29/7enpWboFXqY49eb5/PPP9d1336l+/fr55j3wwAM6fvy4vvzyS/3555+KiYnRo48+qo8//rjQbVfGY5unsFp/++035ebmaubMmQoJCdHu3bs1ePBgnTt3Tm+++WaR2y/PY1tax1Wq/OesVDr1VpXztri19u7dW3PnzrW/vvwt4NfSOVtUrVXlnC13BsXy3HPPmRtvvLFE66SnpxtJZt26dfa2oKAg8/bbbxe7j7S0NOPi4mKWLl1qb9uzZ4+RZLZs2VKi8RRXadV6qezsbOPr62vGjx9fZD+RkZFmxIgRJdr231Xceo8ePWoaNGhgdu/ene84/vrrr0aS+f777+1tX3zxhbHZbObYsWMF9leZj21RtRbkjTfeMI0bNy5ymfI+tqVVa1U4Z40pm2NbWc/b4tQ6cOBA07dv30LnX0vn7JVqLUhlPGfLG7fAimnFihXq2LGj7rnnHtWrV08dOnTQ7NmzC10+Oztbs2bNkre3t9q1a+cwb+LEiapbt646dOigyZMn6+LFi4X2s337dv3555+Kioqyt7Vs2VKNGjXSli1b/n5hBSjNWi/t89SpU4qJibni9j/66CP5+PjouuuuU1xcnM6fP3/VtRRHcerNzc3VQw89pFGjRqlNmzb5+tiyZYtq1arl8L/kqKgoOTk55bsNmqeyHtsr1VqQ9PR01alT54rLleexLc1aK/s5K5XNsa2s521xf0dt2LBB9erVU4sWLfT444/r1KlT9nnX0jkrFV1rQSrjOVvuKjqBVRVubm7Gzc3NxMXFmR07dpiZM2cad3d3M2/ePIflVq5caapXr25sNpupX7++2bZtm8P8KVOmmPXr15uffvrJvPfee6ZWrVrmqaeeKnS7H330kXF1dc3X3qlTJ/Pss8+WTnGXKa1aL9WnTx/Tp0+fK2575syZZs2aNebnn382CxcuNA0aNDB33XXX366pKMWp9/XXXzc9evQwubm5xpj8VwVee+0107x583x9+/r6mv/85z8FbreyHtsr1Xq5xMRE4+XlZWbNmlXktsv72JZWrVXhnDWmbI5tZT1vi1ProkWLzPLly83PP/9sPv/8c9OqVSvTqVMnc/HiRWPMtXXOXqnWy1XWc7a8EYCKycXFxYSHhzu0Pfnkk6ZLly4ObZmZmSYxMdFs2bLF/Pvf/zbBwcEmJSWl0H4/+OADU61aNfPHH38UOL8iTrjSrvXIkSPGycnJLFu2rMRjiY+PN5LMvn37SrxucV2p3h9++MH4+fk5XBavqgGoNGq91NGjR03Tpk3NI488UuKxlPWxLe1a81TGc9aY0q+3Mp+3xf0ddan9+/c73Ka/Vs7Zglxe66Uq8zlb3rgFVkwBAQFq3bq1Q1urVq10+PBhh7bq1asrJCREXbp00QcffKBq1arpgw8+KLTfsLAwXbx4UYcOHSpwvr+/v7Kzs5WWlubQnpKSIn9//6uq5UpKu9a5c+eqbt26uuOOO0o8lrCwMEnSvn37SrxucV2p3q+//lonTpxQo0aNVK1aNVWrVk1JSUl6+umnFRwcLOmv43TixAmHPi5evKjTp08Xepwq47EtTq15fv/9d918882KiIjQrFmzSjyWsj62pVnr5eOubOesVPr1Vubztri/oy7VpEkT+fj42Md0rZyzBbm81jyV/ZwtbwSgYuratav27t3r0JaQkKCgoKAi18vNzVVWVlah83fu3CknJyfVq1evwPmhoaFycXFRfHy8vW3v3r06fPiwwsPDS1BB8ZVmrcYYzZ07V9HR0XJxcSnxWHbu3Cnpr18CZeVK9T700EP6+eeftXPnTvtUv359jRo1SmvXrpUkhYeHKy0tTdu3b7f38dVXXyk3N9f+S+NylfHYFqdW6a+35Xbv3l2hoaGaO3eunJxK/qukrI9tadVa0Lgr2zkrlW69lf28vZrfUUePHtWpU6fsY7pWztmCXF6rVDXO2XJX0Zegqopt27aZatWqmddee80kJiaajz76yHh6epqFCxcaY/66HRQXF2e2bNliDh06ZH744QcTExNj3NzczO7du40xxmzevNm8/fbbZufOnWb//v1m4cKFxtfX10RHR9u3c/ToUdOiRQuzdetWe9tjjz1mGjVqZL766ivzww8/mPDw8HyXRCtbrXnWrVtnJJk9e/bk287lte7bt8+MHz/e/PDDD+bgwYNm+fLlpkmTJqZbt25lVmtx6i1IQbcOevfubTp06GC2bt1qvvnmG9OsWTMzYMAA+/yqcGwLcnmtR48eNSEhIebWW281R48eNcePH7dPly5T0ce2NGqtKuesMaX3c2xM5T9vr1Tr2bNnzTPPPGO2bNliDh48aNatW2duuOEG06xZM4dbl9fCOVucWqvKOVveCEAlsHLlSnPdddcZNzc307JlS4cHyC5cuGDuuusuU79+fePq6moCAgLMHXfc4fBg8Pbt201YWJjx9vY27u7uplWrVub11193OCEPHjxoJJn169c79P3EE0+Y2rVrG09PT3PXXXc5/OBWxlrzDBgwwERERBS4jctrPXz4sOnWrZupU6eOcXNzMyEhIWbUqFEmPT29TGq8VFH1FqSgPxynTp0yAwYMMDVq1DBeXl4mJibGnD171j6/Khzbglxe69y5c42kAqc8leXY/t1aq9I5a0zp/BwbUzXO26JqPX/+vOnZs6fx9fU1Li4uJigoyAwePNgkJyc79HEtnLPFqbUqnbPlyWaMMeV5xQkAAKCi8QwQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQgL+te/fuGjlyZLlv9+GHH9add95Z7tstT8HBwZo6dWpFDwO45hCAAFRZ77zzjubNm1fu2503b55q1apVonUIMkDlUq2iBwAABcnJyZHNZivySxu9vb3LcUQAriVcAQKuId27d9fw4cP17LPPqk6dOvL399e4ceMkSYcOHZLNZrN/o7MkpaWlyWazacOGDZKkDRs2yGazae3aterQoYM8PDx0yy236MSJE/riiy/UqlUreXl56f7779f58+cdtn3x4kUNGzZM3t7e8vHx0ejRo3XpN+1kZWXpmWeeUYMGDVS9enWFhYXZtyv976rKihUr1Lp1a7m5uenw4cNF1nv5LbCi6s9js9n03nvvqU+fPvLw8FCTJk20bNky+/y8fZCWlmZv27lzp2w2mw4dOqQNGzYoJiZG6enpstlsstls+bZxue7duyspKUlPPfWUfZ08n376qdq0aSM3NzcFBwdrypQpRfb1/vvvq1atWvZvJN+9e7f69OmjGjVqyM/PTw899JBSU1OLvU+MMRo3bpwaNWokNzc31a9fX8OHDy9yDMA1oWK/igxAaYqMjDReXl5m3LhxJiEhwcyfP9/YbDbz3//+1/5lhz/++KN9+TNnzjh8AeL69euNJNOlSxfzzTffmB07dpiQkBATGRlpevbsaXbs2GE2bdpk6tatayZOnOiw3Ro1apgRI0aY3377zSxcuNB4eno6fGnjoEGDTEREhNm0aZPZt2+fmTx5snFzczMJCQnGmL++sNHFxcVERESYb7/91vz222/m3LlzRdY7cOBA07dv32LVn0eSqVu3rpk9e7bZu3eveemll4yzs7P59ddfHfbBmTNn7Ov8+OOPRpI5ePCgycrKMlOnTjVeXl72b9S+9As0C3Lq1CnTsGFDM378eIdv4f7hhx+Mk5OTGT9+vNm7d6+ZO3eu8fDwMHPnzrWve+kXlk6aNMnUrVvX/o3dZ86cMb6+viYuLs7s2bPH7Nixw/To0cPcfPPNxd4nS5cuNV5eXmb16tUmKSnJbN269YpfogpcCwhAwDUkMjLS3HjjjQ5tnTp1Ms8991yJAtC6devsy0yYMMFIMvv377e3DRkyxPTq1cthu61atTK5ubn2tueee860atXKGGNMUlKScXZ2NseOHXMY26233mri4uKMMf/7xuqdO3cWu96CAlBh9eeRZB577DGHZcLCwszjjz/usA8KC0B5Y/X29i72OI0p+JvX77//ftOjRw+HtlGjRpnWrVvnW+/ZZ581AQEBZvfu3fZ5r7zyiunZs6fD+keOHDGSzN69e40xV94nU6ZMMc2bNzfZ2dklqgeo6rgFBlxj2rZt6/A6ICBAJ06cuOo+/Pz85OnpqSZNmji0Xd5nly5dHG7thIeHKzExUTk5Odq1a5dycnLUvHlz1ahRwz5t3LhR+/fvt6/j6uqab/wlVZz6w8PD873es2fP39ru1dizZ4+6du3q0Na1a1f7fsszZcoUzZ49W998843atGljb//pp5+0fv16h33asmVLSXLYr0Xtk3vuuUcXLlxQkyZNNHjwYH3++ee6ePFiqdcKVDY8BA1cY1xcXBxe22w25ebm2h8mNpc8l/Pnn39esQ+bzVZon8WVmZkpZ2dnbd++Xc7Ozg7zatSoYf+3h4eHQ4i6Gn93rCXZT+Xlpptu0qpVq7RkyRI9//zz9vbMzEzdfvvtmjRpUr51AgIC7P8uap8EBgZq7969Wrdunb788ks98cQTmjx5sjZu3JhvPeBaQgACLMLX11eSdPz4cXXo0EGSHB6I/ru2bt3q8Pq7775Ts2bN5OzsrA4dOignJ0cnTpzQTTfdVGrbvFrfffedoqOjHV7n7ZNL91Pt2rUl5d9Prq6uDldoiqOgdVq1aqVvv/3Woe3bb79V8+bNHYJi586dNWzYMPXu3VvVqlXTM888I0m64YYb9Omnnyo4OFjVql39r3MPDw/dfvvtuv322zV06FC1bNlSu3bt0g033HDVfQKVHbfAAIvw8PBQly5dNHHiRO3Zs0cbN27USy+9VGr9Hz58WLGxsdq7d68WLVqkadOmacSIEZKk5s2b64EHHlB0dLQ+++wzHTx4UNu2bdOECRO0atWqUhtDcS1dulRz5sxRQkKCxo4dq23btmnYsGGSpJCQEAUGBmrcuHFKTEzUqlWr8r0zKzg4WJmZmYqPj1dqamq+d8QVJDg4WJs2bdKxY8fs79J6+umnFR8fr1deeUUJCQmaP3++3n33XXvAuVRERIRWr16tl19+2f55QkOHDtXp06c1YMAAff/999q/f7/Wrl2rmJiYYge0efPm6YMPPtDu3bt14MABLVy4UB4eHgoKCirW+kBVRQACLGTOnDm6ePGiQkNDNXLkSL366qul1nd0dLQuXLigzp07a+jQoRoxYoQeffRR+/y5c+cqOjpaTz/9tFq0aKE777xT33//vRo1alRqYyiul19+WZ988onatm2rDz/8UIsWLVLr1q0l/XW7aNGiRfrtt9/Utm1bTZo0Kd9+ioiI0GOPPab+/fvL19dXb7zxxhW3OX78eB06dEhNmza1X2W64YYbtGTJEn3yySe67rrrNGbMGI0fP14PP/xwgX3ceOONWrVqlV566SVNmzZN9evX17fffqucnBz17NlT119/vUaOHKlatWoV+flJl6pVq5Zmz56trl27qm3btlq3bp1WrlypunXrFmt9oKqymUtvdAPANc5ms+nzzz+/5r9CA0DRuAIEAAAshwAEoNK69O3dl09ff/11RQ/P7uuvvy5yrAAqH26BAai09u3bV+i8Bg0ayMPDoxxHU7gLFy7o2LFjhc4PCQkpx9EAKA4CEAAAsBxugQEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMv5//KcgIuL/M9kAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "final_df = pd.DataFrame()\n", + "final_df[\"number_input_tokens\"] = valid_df[\"number_input_tokens\"]\n", + "final_df[\"number_output_tokens\"] = valid_df[\"number_output_tokens\"]\n", + "final_df[\"ttft_s\"] = valid_df[\"ttft_s\"]\n", + "final_df[\"end_to_end_latency_s\"] = valid_df[\"end_to_end_latency_s\"]\n", + "final_df[\"generation_throughput\"] = valid_df[\"request_output_throughput_token_per_s\"]\n", + "\n", + "mean_tokens_in = final_df[\"number_input_tokens\"].mean()\n", + "mean_tokens_out = valid_df[\"number_output_tokens\"].mean()\n", + "print(f\"Mean number of input tokens: {mean_tokens_in}. Mean number of output tokens: {mean_tokens_out}\")\n", + "final_df.plot.scatter(x=\"number_input_tokens\", y=\"ttft_s\", title=\"Number of Input Tokens vs. TTFT\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a14de79c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGzCAYAAADT4Tb9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvAklEQVR4nO3de1TUVb/H8c+AAl7wUioocsRILTO19EiEVhaJ6aHMfLyVIpkdE08m6VNmiaaJWZKdMk3zkqdj+pSXWqWWktbpaI8nL1k9XlIjvIGQFxQTEPb5w+U8TWDCODCwfb/WmrWaPXv/5rt31HzW77d/Mw5jjBEAAIAlfLxdAAAAgCcRbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuABTjcDg0cuRIb5dxVXA4HJo4caK3ywCsQrgBLOFwOEr12Lhxo7dLLZO77rpLbdq08cixNm3apIkTJ+rkyZMeOR6AyqmatwsA4Bn/9V//5fJ88eLFWrduXbH2G2+8sSLLqlQ2bdqkSZMmaciQIapXr563y5Ek/fbbb6pWjf8VA57Ef1GAJR555BGX5998843WrVtXrB2VS0BAgLdLAKzDZSngKpKbm6unn35aoaGh8vf3V6tWrfTqq6/KGHPZsVOmTJGPj4/eeOMNZ9uaNWvUpUsX1apVS4GBgerZs6d+/PFHl3FDhgxR7dq1dfjwYfXq1Uu1a9dWw4YNNWbMGBUWFnpkXjt37tSQIUN03XXXKSAgQMHBwXr00Uf166+/OvtMnDhRY8eOlSQ1b97ceZkuLS3N2ee9995Thw4dVKNGDV1zzTXq37+/Dh486PJeFy+T/eMf/1DXrl1Vs2ZNhYSEaPr06cXqOnfunCZOnKiWLVsqICBAjRs3Vu/evbV//35nn5L23Bw+fFiPPvqogoKC5O/vr5tuukkLFiwodvw33nhDN910k2rWrKn69eurY8eOWrJkiTtLCFiFMzfAVcIYo/vvv18bNmzQ0KFD1b59e3322WcaO3asDh8+rNdee+2SY59//nlNnTpVb7/9toYNGybpwmWwuLg4xcTE6OWXX9bZs2c1e/Zsde7cWdu3b1dYWJhzfGFhoWJiYhQREaFXX31V69ev14wZMxQeHq4nnnjiiue2bt06HThwQPHx8QoODtaPP/6ouXPn6scff9Q333wjh8Oh3r17a+/evXr//ff12muvqUGDBpKkhg0bSpJeeuklvfDCC+rbt68ee+wxZWVl6Y033tAdd9yh7du3u1zGOnHihLp3767evXurb9+++vDDD/XMM8/o5ptv1n333eec87/9278pNTVV/fv316hRo3T69GmtW7dOP/zwg8LDw0ucS2Zmpm677Tbnpu6GDRtqzZo1Gjp0qHJycvTUU09JkubNm6cnn3xSffr00ahRo3Tu3Dnt3LlTf//73zVw4MArXlOgSjMArJSQkGB+/5/4qlWrjCQzZcoUl359+vQxDofD7Nu3z9kmySQkJBhjjHn66aeNj4+PWbRokfP106dPm3r16plhw4a5HCsjI8PUrVvXpT0uLs5IMi+++KJL31tuucV06NDhsvO48847zU033fSnfc6ePVus7f333zeSzFdffeVse+WVV4wk8/PPP7v0TUtLM76+vuall15yaf/+++9NtWrVXNrvvPNOI8ksXrzY2ZaXl2eCg4PNQw895GxbsGCBkWRSUlKK1VZUVOT8Z0kmKSnJ+Xzo0KGmcePGJjs722VM//79Td26dZ1zfeCBBy67LsDVistSwFVi9erV8vX11ZNPPunS/vTTT8sYozVr1ri0G2M0cuRIvf7663rvvfcUFxfnfG3dunU6efKkBgwYoOzsbOfD19dXERER2rBhQ7H3Hz58uMvzLl266MCBAx6ZW40aNZz/fO7cOWVnZ+u2226TJG3btu2y41esWKGioiL17dvXZT7BwcFq0aJFsfnUrl3bZS+Tn5+fOnXq5DKf5cuXq0GDBvqP//iPYu/ncDhKrMMYo+XLlys2NlbGGJdaYmJidOrUKed86tWrp0OHDun//u//Ljs/4GrDZSngKvHLL7+oSZMmCgwMdGm/ePfUL7/84tK+ePFinTlzRrNnz9aAAQNcXvvpp58kSXfffXeJ71WnTh2X5wEBAc7LPxfVr19fJ06cKPtESnD8+HFNmjRJS5cu1bFjx1xeO3Xq1GXH//TTTzLGqEWLFiW+Xr16dZfnTZs2LRZQ6tevr507dzqf79+/X61atSrTnVBZWVk6efKk5s6dq7lz55bY5+L8nnnmGa1fv16dOnXS9ddfr27dumngwIGKiooq9fsBtiLcAChRVFSUduzYoTfffFN9+/bVNddc43ytqKhI0oV9N8HBwcXG/vED3dfXt1xr7du3rzZt2qSxY8eqffv2ql27toqKitS9e3dnrX+mqKhIDodDa9asKbHW2rVruzy/1HxMKTZmX64O6cKdb78/U/Z7bdu2lXQhlO7Zs0effPKJ1q5dq+XLl+utt97ShAkTNGnSpCuqA6jqCDfAVaJZs2Zav369Tp8+7XL2Zvfu3c7Xf+/666/X9OnTddddd6l79+5KTU11jru4GbZRo0aKjo6uoBmU7MSJE0pNTdWkSZM0YcIEZ/vFs0u/d6nLQeHh4TLGqHnz5mrZsqVH6goPD9ff//53FRQUFDvzcykNGzZUYGCgCgsLS7WutWrVUr9+/dSvXz/l5+erd+/eeumllzRu3DhuMcdVjT03wFWiR48eKiws1JtvvunS/tprr8nhcDjv8vm9tm3bavXq1dq1a5diY2P122+/SZJiYmJUp04dTZ06VQUFBcXGZWVllc8kSnDxLMofz5rMnDmzWN9atWpJUrFvKO7du7d8fX01adKkYscxxrjcUl5aDz30kLKzs4utd0m1XuTr66uHHnpIy5cv1w8//FDs9d+v6x9r8vPzU+vWrWWMKfHfCXA14cwNcJWIjY1V165dNX78eKWlpaldu3b6/PPP9dFHH+mpp5665K3Jt912mz766CP16NFDffr00apVq1SnTh3Nnj1bgwYN0q233qr+/furYcOGSk9P16effqqoqKgSP9TdlZWVpSlTphRrb968uR5++GHdcccdmj59ugoKChQSEqLPP/9cP//8c7H+HTp0kCSNHz9e/fv3V/Xq1RUbG6vw8HBNmTJF48aNU1pamnr16qXAwED9/PPPWrlypR5//HGNGTOmTDUPHjxYixcvVmJiorZs2aIuXbooNzdX69ev14gRI/TAAw+UOG7atGnasGGDIiIiNGzYMLVu3VrHjx/Xtm3btH79eh0/flyS1K1bNwUHBysqKkpBQUHatWuX3nzzTfXs2bPYvirgquOdm7QAlLc/3gpuzIVbuEePHm2aNGliqlevblq0aGFeeeUVl1uTjXG9Ffyijz76yFSrVs3069fPFBYWGmOM2bBhg4mJiTF169Y1AQEBJjw83AwZMsR8++23znFxcXGmVq1axepLSkoqVl9JLt56XdLjnnvuMcYYc+jQIfPggw+aevXqmbp165q//OUv5siRI8VuszbGmMmTJ5uQkBDj4+NT7Lbw5cuXm86dO5tatWqZWrVqmRtuuMEkJCSYPXv2uNRT0i3YcXFxplmzZi5tZ8+eNePHjzfNmzc31atXN8HBwaZPnz5m//79zj4l1ZiZmWkSEhJMaGioc9w999xj5s6d6+zz9ttvmzvuuMNce+21xt/f34SHh5uxY8eaU6dOXXZNAds5jLnCHXAAAACVCHtuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsctV9iV9RUZGOHDmiwMDAS34VOwAAqFyMMTp9+rSaNGkiH58/Pzdz1YWbI0eOKDQ01NtlAAAANxw8eFBNmzb90z5XXbi5+LXkBw8eVJ06dbxcDQAAKI2cnByFhoaW6udFrrpwc/FSVJ06dQg3AABUMaXZUsKGYgAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwilfDzVdffaXY2Fg1adJEDodDq1atuuyYjRs36tZbb5W/v7+uv/56LVq0qNzrBAAAVYdXw01ubq7atWunWbNmlar/zz//rJ49e6pr167asWOHnnrqKT322GP67LPPyrlSAABQVXj1hzPvu+8+3XfffaXuP2fOHDVv3lwzZsyQJN144436+uuv9dprrykmJqa8ygQAAFVIldpzs3nzZkVHR7u0xcTEaPPmzZcck5eXp5ycHJcHAACwl1fP3JRVRkaGgoKCXNqCgoKUk5Oj3377TTVq1Cg2Jjk5WZMmTaqoEhX27KcV9l6ekjatp7dLAABcAp8rZVelzty4Y9y4cTp16pTzcfDgQW+XBAAAylGVOnMTHByszMxMl7bMzEzVqVOnxLM2kuTv7y9/f/+KKA8AAFQCVerMTWRkpFJTU13a1q1bp8jISC9VBAAAKhuvhpszZ85ox44d2rFjh6QLt3rv2LFD6enpki5cUho8eLCz//Dhw3XgwAH99a9/1e7du/XWW2/pb3/7m0aPHu2N8gEAQCXk1XDz7bff6pZbbtEtt9wiSUpMTNQtt9yiCRMmSJKOHj3qDDqS1Lx5c3366adat26d2rVrpxkzZuidd97hNnAAAODk1T03d911l4wxl3y9pG8fvuuuu7R9+/ZyrAoAAFRlVWrPDQAAwOUQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFa+Hm1mzZiksLEwBAQGKiIjQli1b/rT/zJkz1apVK9WoUUOhoaEaPXq0zp07V0HVAgCAys6r4WbZsmVKTExUUlKStm3bpnbt2ikmJkbHjh0rsf+SJUv07LPPKikpSbt27dL8+fO1bNkyPffccxVcOQAAqKy8Gm5SUlI0bNgwxcfHq3Xr1pozZ45q1qypBQsWlNh/06ZNioqK0sCBAxUWFqZu3bppwIABlz3bAwAArh5eCzf5+fnaunWroqOj/1mMj4+io6O1efPmEsfcfvvt2rp1qzPMHDhwQKtXr1aPHj0u+T55eXnKyclxeQAAAHtV89YbZ2dnq7CwUEFBQS7tQUFB2r17d4ljBg4cqOzsbHXu3FnGGJ0/f17Dhw//08tSycnJmjRpkkdrBwAAlZfXNxSXxcaNGzV16lS99dZb2rZtm1asWKFPP/1UkydPvuSYcePG6dSpU87HwYMHK7BiAABQ0bx25qZBgwby9fVVZmamS3tmZqaCg4NLHPPCCy9o0KBBeuyxxyRJN998s3Jzc/X4449r/Pjx8vEpntX8/f3l7+/v+QkAAIBKyWtnbvz8/NShQwelpqY624qKipSamqrIyMgSx5w9e7ZYgPH19ZUkGWPKr1gAAFBleO3MjSQlJiYqLi5OHTt2VKdOnTRz5kzl5uYqPj5ekjR48GCFhIQoOTlZkhQbG6uUlBTdcsstioiI0L59+/TCCy8oNjbWGXIAAMDVzavhpl+/fsrKytKECROUkZGh9u3ba+3atc5Nxunp6S5nap5//nk5HA49//zzOnz4sBo2bKjY2Fi99NJL3poCAACoZBzmKruek5OTo7p16+rUqVOqU6eOx48f9uynHj9meUub1tPbJQAALoHPlQvK8vldpe6WAgAAuBzCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFbfCzYEDBzxdBwAAgEe4FW6uv/56de3aVe+9957OnTvn6ZoAAADc5la42bZtm9q2bavExEQFBwfr3//937Vlyxa3Cpg1a5bCwsIUEBCgiIiIyx7n5MmTSkhIUOPGjeXv76+WLVtq9erVbr03AACwj1vhpn379nr99dd15MgRLViwQEePHlXnzp3Vpk0bpaSkKCsrq1THWbZsmRITE5WUlKRt27apXbt2iomJ0bFjx0rsn5+fr3vvvVdpaWn68MMPtWfPHs2bN08hISHuTAMAAFjoijYUV6tWTb1799YHH3ygl19+Wfv27dOYMWMUGhqqwYMH6+jRo386PiUlRcOGDVN8fLxat26tOXPmqGbNmlqwYEGJ/RcsWKDjx49r1apVioqKUlhYmO688061a9fuSqYBAAAsckXh5ttvv9WIESPUuHFjpaSkaMyYMdq/f7/WrVunI0eO6IEHHrjk2Pz8fG3dulXR0dH/LMbHR9HR0dq8eXOJYz7++GNFRkYqISFBQUFBatOmjaZOnarCwsJLvk9eXp5ycnJcHgAAwF7V3BmUkpKihQsXas+ePerRo4cWL16sHj16yMfnQlZq3ry5Fi1apLCwsEseIzs7W4WFhQoKCnJpDwoK0u7du0scc+DAAX3xxRd6+OGHtXr1au3bt08jRoxQQUGBkpKSShyTnJysSZMmuTNNAABQBbkVbmbPnq1HH31UQ4YMUePGjUvs06hRI82fP/+KivujoqIiNWrUSHPnzpWvr686dOigw4cP65VXXrlkuBk3bpwSExOdz3NychQaGurRugAAQOXhVrj56aefLtvHz89PcXFxl3y9QYMG8vX1VWZmpkt7ZmamgoODSxzTuHFjVa9eXb6+vs62G2+8URkZGcrPz5efn1+xMf7+/vL3979svQAAwA5u7blZuHChPvjgg2LtH3zwgd59991SHcPPz08dOnRQamqqs62oqEipqamKjIwscUxUVJT27dunoqIiZ9vevXvVuHHjEoMNAAC4+rgVbpKTk9WgQYNi7Y0aNdLUqVNLfZzExETNmzdP7777rnbt2qUnnnhCubm5io+PlyQNHjxY48aNc/Z/4okndPz4cY0aNUp79+7Vp59+qqlTpyohIcGdaQAAAAu5dVkqPT1dzZs3L9berFkzpaenl/o4/fr1U1ZWliZMmKCMjAy1b99ea9eudW4yTk9Pd25SlqTQ0FB99tlnGj16tNq2bauQkBCNGjVKzzzzjDvTAAAAFnIr3DRq1Eg7d+4sdjfUd999p2uvvbZMxxo5cqRGjhxZ4msbN24s1hYZGalvvvmmTO8BAACuHm5dlhowYICefPJJbdiwQYWFhSosLNQXX3yhUaNGqX///p6uEQAAoNTcOnMzefJkpaWl6Z577lG1ahcOUVRUpMGDB5dpzw0AAICnuRVu/Pz8tGzZMk2ePFnfffedatSooZtvvlnNmjXzdH0AAABl4la4uahly5Zq2bKlp2oBAAC4Ym6Fm8LCQi1atEipqak6duyYy/fOSNIXX3zhkeIAAADKyq1wM2rUKC1atEg9e/ZUmzZt5HA4PF0XAACAW9wKN0uXLtXf/vY39ejRw9P1AAAAXBG3bgX38/PT9ddf7+laAAAArphb4ebpp5/W66+/LmOMp+sBAAC4Im5dlvr666+1YcMGrVmzRjfddJOqV6/u8vqKFSs8UhwAAEBZuRVu6tWrpwcffNDTtQAAAFwxt8LNwoULPV0HAACAR7i150aSzp8/r/Xr1+vtt9/W6dOnJUlHjhzRmTNnPFYcAABAWbl15uaXX35R9+7dlZ6erry8PN17770KDAzUyy+/rLy8PM2ZM8fTdQIAAJSKW2duRo0apY4dO+rEiROqUaOGs/3BBx9Uamqqx4oDAAAoK7fO3PzP//yPNm3aJD8/P5f2sLAwHT582COFAQAAuMOtMzdFRUUqLCws1n7o0CEFBgZecVEAAADucivcdOvWTTNnznQ+dzgcOnPmjJKSkvhJBgAA4FVuXZaaMWOGYmJi1Lp1a507d04DBw7UTz/9pAYNGuj999/3dI0AAACl5la4adq0qb777jstXbpUO3fu1JkzZzR06FA9/PDDLhuMAQAAKppb4UaSqlWrpkceecSTtQAAAFwxt8LN4sWL//T1wYMHu1UMAADAlXIr3IwaNcrleUFBgc6ePSs/Pz/VrFmTcAMAALzGrbulTpw44fI4c+aM9uzZo86dO7OhGAAAeJXbvy31Ry1atNC0adOKndUBAACoSB4LN9KFTcZHjhzx5CEBAADKxK09Nx9//LHLc2OMjh49qjfffFNRUVEeKQwAAMAdboWbXr16uTx3OBxq2LCh7r77bs2YMcMTdQEAALjFrXBTVFTk6ToAAAA8wqN7bgAAALzNrTM3iYmJpe6bkpLizlsAAAC4xa1ws337dm3fvl0FBQVq1aqVJGnv3r3y9fXVrbfe6uzncDg8UyUAAEApuRVuYmNjFRgYqHfffVf169eXdOGL/eLj49WlSxc9/fTTHi0SAACgtNzaczNjxgwlJyc7g40k1a9fX1OmTOFuKQAA4FVuhZucnBxlZWUVa8/KytLp06evuCgAAAB3uRVuHnzwQcXHx2vFihU6dOiQDh06pOXLl2vo0KHq3bu3p2sEAAAoNbf23MyZM0djxozRwIEDVVBQcOFA1app6NCheuWVVzxaIAAAQFm4FW5q1qypt956S6+88or2798vSQoPD1etWrU8WhwAAEBZXdGX+B09elRHjx5VixYtVKtWLRljPFUXAACAW9wKN7/++qvuuecetWzZUj169NDRo0clSUOHDuU2cAAA4FVuhZvRo0erevXqSk9PV82aNZ3t/fr109q1az1WHAAAQFm5tefm888/12effaamTZu6tLdo0UK//PKLRwoDAABwh1tnbnJzc13O2Fx0/Phx+fv7X3FRAAAA7nIr3HTp0kWLFy92Pnc4HCoqKtL06dPVtWtXjxUHAABQVm5dlpo+fbruueceffvtt8rPz9df//pX/fjjjzp+/Lj+93//19M1AgAAlJpbZ27atGmjvXv3qnPnznrggQeUm5ur3r17a/v27QoPD/d0jQAAAKVW5jM3BQUF6t69u+bMmaPx48eXR00AAABuK/OZm+rVq2vnzp3lUQsAAMAVc+uy1COPPKL58+d7uhYAAIAr5taG4vPnz2vBggVav369OnToUOw3pVJSUjxSHAAAQFmVKdwcOHBAYWFh+uGHH3TrrbdKkvbu3evSx+FweK46AACAMipTuGnRooWOHj2qDRs2SLrwcwv/+Z//qaCgoHIpDgAAoKzKtOfmj7/6vWbNGuXm5nq0IAAAgCvh1obii/4YdgAAALytTOHG4XAU21PDHhsAAFCZlGnPjTFGQ4YMcf445rlz5zR8+PBid0utWLHCcxUCAACUQZnCTVxcnMvzRx55xKPFAAAAXKkyhZuFCxeWVx0AAAAecUUbigEAACobwg0AALBKpQg3s2bNUlhYmAICAhQREaEtW7aUatzSpUvlcDjUq1ev8i0QAABUGV4PN8uWLVNiYqKSkpK0bds2tWvXTjExMTp27NifjktLS9OYMWPUpUuXCqoUAABUBV4PNykpKRo2bJji4+PVunVrzZkzRzVr1tSCBQsuOaawsFAPP/ywJk2apOuuu+5Pj5+Xl6ecnByXBwAAsJdXw01+fr62bt2q6OhoZ5uPj4+io6O1efPmS4578cUX1ahRIw0dOvSy75GcnKy6des6H6GhoR6pHQAAVE5eDTfZ2dkqLCws9sObQUFBysjIKHHM119/rfnz52vevHmleo9x48bp1KlTzsfBgwevuG4AAFB5lel7brzt9OnTGjRokObNm6cGDRqUaoy/v7/zG5UBAID9vBpuGjRoIF9fX2VmZrq0Z2ZmKjg4uFj//fv3Ky0tTbGxsc62oqIiSVK1atW0Z88ehYeHl2/RAACgUvPqZSk/Pz916NBBqampzraioiKlpqYqMjKyWP8bbrhB33//vXbs2OF83H///eratat27NjBfhoAAOD9y1KJiYmKi4tTx44d1alTJ82cOVO5ubmKj4+XJA0ePFghISFKTk5WQECA2rRp4zK+Xr16klSsHQAAXJ28Hm769eunrKwsTZgwQRkZGWrfvr3Wrl3r3GScnp4uHx+v37EOAACqCK+HG0kaOXKkRo4cWeJrGzdu/NOxixYt8nxBAACgyuKUCAAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsUinCzaxZsxQWFqaAgABFRERoy5Ytl+w7b948denSRfXr11f9+vUVHR39p/0BAMDVxevhZtmyZUpMTFRSUpK2bdumdu3aKSYmRseOHSux/8aNGzVgwABt2LBBmzdvVmhoqLp166bDhw9XcOUAAKAy8nq4SUlJ0bBhwxQfH6/WrVtrzpw5qlmzphYsWFBi///+7//WiBEj1L59e91www165513VFRUpNTU1AquHAAAVEZeDTf5+fnaunWroqOjnW0+Pj6Kjo7W5s2bS3WMs2fPqqCgQNdcc02Jr+fl5SknJ8flAQAA7OXVcJOdna3CwkIFBQW5tAcFBSkjI6NUx3jmmWfUpEkTl4D0e8nJyapbt67zERoaesV1AwCAysvrl6WuxLRp07R06VKtXLlSAQEBJfYZN26cTp065XwcPHiwgqsEAAAVqZo337xBgwby9fVVZmamS3tmZqaCg4P/dOyrr76qadOmaf369Wrbtu0l+/n7+8vf398j9QIAgMrPq2du/Pz81KFDB5fNwBc3B0dGRl5y3PTp0zV58mStXbtWHTt2rIhSAQBAFeHVMzeSlJiYqLi4OHXs2FGdOnXSzJkzlZubq/j4eEnS4MGDFRISouTkZEnSyy+/rAkTJmjJkiUKCwtz7s2pXbu2ateu7bV5AACAysHr4aZfv37KysrShAkTlJGRofbt22vt2rXOTcbp6eny8fnnCabZs2crPz9fffr0cTlOUlKSJk6cWJGlAwCASsjr4UaSRo4cqZEjR5b42saNG12ep6WllX9BAACgyqrSd0sBAAD8EeEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxSKcLNrFmzFBYWpoCAAEVERGjLli1/2v+DDz7QDTfcoICAAN18881avXp1BVUKAAAqO6+Hm2XLlikxMVFJSUnatm2b2rVrp5iYGB07dqzE/ps2bdKAAQM0dOhQbd++Xb169VKvXr30ww8/VHDlAACgMvJ6uElJSdGwYcMUHx+v1q1ba86cOapZs6YWLFhQYv/XX39d3bt319ixY3XjjTdq8uTJuvXWW/Xmm29WcOUAAKAyqubNN8/Pz9fWrVs1btw4Z5uPj4+io6O1efPmEsds3rxZiYmJLm0xMTFatWpVif3z8vKUl5fnfH7q1ClJUk5OzhVWX7KivLPlctzyVF5rAQC4cnyuuB7TGHPZvl4NN9nZ2SosLFRQUJBLe1BQkHbv3l3imIyMjBL7Z2RklNg/OTlZkyZNKtYeGhrqZtX2qTvT2xUAAGxSnp8rp0+fVt26df+0j1fDTUUYN26cy5meoqIiHT9+XNdee60cDocXKyt/OTk5Cg0N1cGDB1WnTh1vl1OlsHbuYd3cw7q5j7VzT1VcN2OMTp8+rSZNmly2r1fDTYMGDeTr66vMzEyX9szMTAUHB5c4Jjg4uEz9/f395e/v79JWr14994uugurUqVNl/ngrG9bOPaybe1g397F27qlq63a5MzYXeXVDsZ+fnzp06KDU1FRnW1FRkVJTUxUZGVnimMjISJf+krRu3bpL9gcAAFcXr1+WSkxMVFxcnDp27KhOnTpp5syZys3NVXx8vCRp8ODBCgkJUXJysiRp1KhRuvPOOzVjxgz17NlTS5cu1bfffqu5c+d6cxoAAKCS8Hq46devn7KysjRhwgRlZGSoffv2Wrt2rXPTcHp6unx8/nmC6fbbb9eSJUv0/PPP67nnnlOLFi20atUqtWnTxltTqLT8/f2VlJRU7LIcLo+1cw/r5h7WzX2snXtsXzeHKc09VQAAAFWE17/EDwAAwJMINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwU0V89dVXio2NVZMmTeRwOC75Q6G/l5eXp/Hjx6tZs2by9/dXWFiYy6+tz5s3T126dFH9+vVVv359RUdHa8uWLeU4C+8oj7X7vaVLl8rhcKhXr16eLdzLymvdTp48qYSEBDVu3Fj+/v5q2bKlVq9eXU6zqHjltW4zZ85Uq1atVKNGDYWGhmr06NE6d+5cOc3CO8q6dkOGDJHD4Sj2uOmmm1z6zZo1S2FhYQoICFBERIR1/58rj3VLTk7Wv/7rvyowMFCNGjVSr169tGfPnnKeiecQbqqI3NxctWvXTrNmzSr1mL59+yo1NVXz58/Xnj179P7776tVq1bO1zdu3KgBAwZow4YN2rx5s0JDQ9WtWzcdPny4PKbgNeWxdhelpaVpzJgx6tKliydLrhTKY93y8/N17733Ki0tTR9++KH27NmjefPmKSQkpDym4BXlsW5LlizRs88+q6SkJO3atUvz58/XsmXL9Nxzz5XHFLymrGv3+uuv6+jRo87HwYMHdc011+gvf/mLs8+yZcuUmJiopKQkbdu2Te3atVNMTIyOHTtWXtOocOWxbl9++aUSEhL0zTffaN26dSooKFC3bt2Um5tbXtPwLIMqR5JZuXLln/ZZs2aNqVu3rvn1119Lfdzz58+bwMBA8+67715hhZWXJ9fu/Pnz5vbbbzfvvPOOiYuLMw888IDnCq1kPLVus2fPNtddd53Jz8/3cIWVk6fWLSEhwdx9990ubYmJiSYqKsoTZVZKpVm7P1q5cqVxOBwmLS3N2dapUyeTkJDgfF5YWGiaNGlikpOTPVVqpeKpdfujY8eOGUnmyy+/vMIKKwZnbiz18ccfq2PHjpo+fbpCQkLUsmVLjRkzRr/99tslx5w9e1YFBQW65pprKrDSyqe0a/fiiy+qUaNGGjp0qJcqrVxKs24ff/yxIiMjlZCQoKCgILVp00ZTp05VYWGhFyv3rtKs2+23366tW7c6L6ccOHBAq1evVo8ePbxVdqU0f/58RUdHq1mzZpIunCncunWroqOjnX18fHwUHR2tzZs3e6vMSueP61aSU6dOSVKV+Xzw+s8voHwcOHBAX3/9tQICArRy5UplZ2drxIgR+vXXX7Vw4cISxzzzzDNq0qSJy/8IrkalWbuvv/5a8+fP144dO7xbbCVSmnU7cOCAvvjiCz388MNavXq19u3bpxEjRqigoEBJSUlenoF3lGbdBg4cqOzsbHXu3FnGGJ0/f17Dhw+37rLUlThy5IjWrFmjJUuWONuys7NVWFjo/Dmfi4KCgrR79+6KLrFSKmnd/qioqEhPPfWUoqKiqs5PHXn71BHKTqU47XjvvfeagIAAc/LkSWfb8uXLjcPhMGfPni3WPzk52dSvX9989913ni63UvHE2uXk5JiwsDCzevVq5+tclird31yLFi1MaGioOX/+vLPPjBkzTHBwcLnU7W2eWrcNGzaYoKAgM2/ePLNz506zYsUKExoaal588cXyLN+rSrN2vzd16lRz7bXXmry8PGfb4cOHjSSzadMml75jx441nTp18lSplYon1u2Phg8fbpo1a2YOHjzogQorBmduLNW4cWOFhISobt26zrYbb7xRxhgdOnRILVq0cLa/+uqrmjZtmtavX6+2bdt6o9xK5XJrl5ubq7S0NMXGxjpfLyoqkiRVq1ZNe/bsUXh4eIXX7W2l+Ztr3LixqlevLl9fX5c+GRkZys/Pl5+fnzdK96rSrNsLL7ygQYMG6bHHHpMk3XzzzcrNzdXjjz+u8ePHu/y48NXIGKMFCxZo0KBBLn9DDRo0kK+vrzIzM136Z2ZmKjg4uKLLrHQutW6/N3LkSH3yySf66quv1LRp0wqu0H1X938RFouKitKRI0d05swZZ9vevXvl4+Pj8gc6ffp0TZ48WWvXrlXHjh29UWqlc7m1u+GGG/T9999rx44dzsf999+vrl27aseOHQoNDfVi9d5Tmr+5qKgo7du3zxkGL/Zp3LjxVRlspNKt29mzZ4sFmIsB0fDbx/ryyy+1b9++Yvvf/Pz81KFDB6WmpjrbioqKlJqaqsjIyIous9K51LpJF/6uRo4cqZUrV+qLL75Q8+bNvVDhFfDeSSOUxenTp8327dvN9u3bjSSTkpJitm/fbn755RdjjDHPPvusGTRokEv/pk2bmj59+pgff/zRfPnll6ZFixbmsccec/aZNm2a8fPzMx9++KE5evSo83H69OkKn195Ko+1+yMbL0uVx7qlp6ebwMBAM3LkSLNnzx7zySefmEaNGpkpU6ZU+PzKS3msW1JSkgkMDDTvv/++OXDggPn8889NeHi46du3b4XPrzyVde0ueuSRR0xERESJx1y6dKnx9/c3ixYtMv/4xz/M448/burVq2cyMjLKdS4VqTzW7YknnjB169Y1GzdudPl8KGlbQ2VEuKkiNmzYYCQVe8TFxRljLny43nnnnS5jdu3aZaKjo02NGjVM06ZNTWJiossfZrNmzUo8ZlJSUsVNrAKUx9r9kY3hprzWbdOmTSYiIsL4+/ub6667zrz00ksue3CquvJYt4KCAjNx4kQTHh5uAgICTGhoqBkxYoQ5ceJExU2sArizdidPnjQ1atQwc+fOveRx33jjDfMv//Ivxs/Pz3Tq1Ml888035TiLilce61bS8SSZhQsXlu9kPMRhDOc0AQCAPdhzAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACr/D/vevnJpwE9FgAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "all_token_latencies = valid_df['end_to_end_latency_s'].apply(pd.Series).stack()\n", + "all_token_latencies = all_token_latencies.reset_index(drop=True)\n", + "all_token_latencies.plot.hist(title=\"Token Latencies\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/env_sample.txt b/env_sample.txt deleted file mode 100644 index f2d6a4e..0000000 --- a/env_sample.txt +++ /dev/null @@ -1,19 +0,0 @@ -#Anyscale Endpoint -ANYSCALE_API_BASE="https://console.endpoints.anyscale.com/m/v1" -ANYSCALE_API_KEY="secret_xxxxx" - -#OpenAI Endpoint -OPENAI_API_BASE="https://api.openai.com/v1" -OPENAI_API_KEY="sk-xxxxx" - -#Fireworks Endpoint -FIREWORKS_API_BASE="https://api.fireworks.ai/inference/v1" -FIREWORKS_API_KEY="xxxxx" - -#vLLM Endpoint -VLLM_API_BASE="https://localhost/v1" -VLLM_API_KEY="xxxxx' - -# Huggingface Text Generation Inference -TGI_API_BASE="http://localhost:8001" -TGI_API_KEY="enter key" diff --git a/llm_correctness.py b/llm_correctness.py new file mode 100644 index 0000000..c9d102d --- /dev/null +++ b/llm_correctness.py @@ -0,0 +1,309 @@ +import argparse +import json +import os +from pathlib import Path +import random +import re +import time +from typing import Any, Dict, List, Optional, Tuple + +import num2words +import ray +from tqdm import tqdm + +from llmperf import common_metrics +from llmperf.common import SUPPORTED_APIS, construct_clients +from llmperf.models import RequestConfig +from llmperf.requests_launcher import RequestsLauncher +from llmperf.utils import ( + LLMPerfResults, +) + +MAX_RANDOM_NUMBER = 10000 + + +def llm_correctness( + model: str, + additional_sampling_params: Optional[Dict[str, Any]] = None, + num_concurrent_requests: int = 1, + max_num_completed_requests: int = 500, + test_timeout_s=90, + llm_api="chat", +) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: + """Get the token throughput and latencies for the given model. + + Args: + model: The name of the model to query. + additional_sampling_params: Additional sampling parameters to send with the request. + For more information see the LLM APIs documentation for the completions + num_concurrent_requests: The number of concurrent requests to make. Increase + this to increase the amount of load and vice versa. + test_timeout_s: The amount of time to run the test for before reporting results. + llm_api: The type of request to make. Either "chat" or "litellm". + + Returns: + A tuple containing summary metrics and raw results from the test. + + """ + + if not additional_sampling_params: + additional_sampling_params = {} + + clients = construct_clients(llm_api=llm_api, num_clients=num_concurrent_requests) + req_launcher = RequestsLauncher(clients) + start_time = time.monotonic() + + num_errored_requests = 0 + num_mismatched_requests = 0 + num_completed_requests = 0 + + sampling_params = {"temperature": 0.0} + sampling_params.update(additional_sampling_params) + completed_requests = [] + iter = 0 + pbar = tqdm(total=max_num_completed_requests) + while ( + time.monotonic() - start_time < test_timeout_s + and num_completed_requests < max_num_completed_requests + ): + iter += 1 + rnd_number = random.randint(0, MAX_RANDOM_NUMBER) + rnd_num_words = num2words.num2words(rnd_number) + + prompt = f"Convert the following sequence of words into a number: {rnd_num_words}.\nPrint the number first." + + request_config = RequestConfig( + model=model, + prompt=(prompt, 0), + sampling_params=sampling_params, + metadata={"rnd_number": rnd_number}, + llm_api=llm_api, + ) + req_launcher.launch_requests(request_config) + + if not (iter % num_concurrent_requests): + completed_requests.extend(req_launcher.get_next_ready()) + pbar.update(len(completed_requests) - num_completed_requests) + num_completed_requests = len(completed_requests) + + pbar.close() + end_time = time.monotonic() + if end_time - start_time >= test_timeout_s: + print("Test timed out before all requests could be completed.") + + raw_results = [] + + print("Mismatched and errored requests.") + for out in completed_requests: + metrics, generated_text, completed_request_config = out + + raw_results.append( + { + "metrics": metrics, + "generated_text": generated_text, + "request_config": dict(completed_request_config), + } + ) + + # if there were no errors when making request. + if not metrics[common_metrics.ERROR_CODE]: + try: + commas_between_numbers_re = r"(\d+),(?=\d)" + gen_text_commas_removed = re.sub( + commas_between_numbers_re, r"\1", generated_text + ) + nums = re.findall(r"\d+", gen_text_commas_removed) + generated_text = gen_text_commas_removed.replace("\n", " ") + + assert str(completed_request_config.metadata["rnd_number"]) in nums + except: + num_mismatched_requests += 1 + print( + f" mismatched request: {generated_text}, expected: {completed_request_config.metadata['rnd_number']}" + ) + else: + num_errored_requests += 1 + print( + f" The request errored: {metrics[common_metrics.ERROR_CODE]}, " + f"{metrics[common_metrics.ERROR_MSG]} " + ) + print() + + error_rate = num_errored_requests / num_completed_requests + mismatch_rate = num_mismatched_requests / num_completed_requests + num_non_errored_requests = num_completed_requests - num_errored_requests + summary_metrics = {} + summary_metrics[common_metrics.NUM_ERRORS] = num_errored_requests + summary_metrics["num_mismatched_requests"] = num_mismatched_requests + summary_metrics["error_rate"] = error_rate + summary_metrics["mismatch_rate"] = mismatch_rate + summary_metrics[common_metrics.NUM_COMPLETED_REQUESTS] = num_completed_requests + summary_metrics["num_non_errored_requests"] = num_non_errored_requests + + # Metadata + summary_metrics["model"] = model + summary_metrics["num_concurrent_requests"] = num_concurrent_requests + summary_metrics["additional_sampling_params"] = additional_sampling_params + summary_metrics["llm_api"] = llm_api + + return summary_metrics, raw_results + + +def run( + llm_api: str, + model: str, + test_timeout_s: int, + max_num_completed_requests: int, + num_concurrent_requests: int, + additional_sampling_params: str, + results_dir: str, + user_metadata: Dict[str, str], +): + """ + Args: + llm_api: The type of request to make. Either "chat" or "litellm". + model: The name of the model to query. + max_num_completed_requests: The number of requests to complete before finishing the test. + test_timeout_s: The amount of time to run the test for before reporting results. + num_concurrent_requests: The number of concurrent requests to make. Increase + this to increase the amount of load and vice versa. + mean_input_tokens: The mean number of tokens to send in the prompt for the request. + stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request. + mean_output_tokens: The mean number of tokens to generate per request. + stddev_output_tokens: The standard deviation of the number of tokens to generate per request. + additional_sampling_params: Additional sampling parameters to send with the request. + For more information see the LLM APIs documentation for the completions. + results_dir: The directory to save the results to. + + """ + + summary_metrics, raw_results = llm_correctness( + model=model, + llm_api=llm_api, + test_timeout_s=test_timeout_s, + max_num_completed_requests=max_num_completed_requests, + num_concurrent_requests=num_concurrent_requests, + additional_sampling_params=json.loads(additional_sampling_params), + ) + + time.sleep(2) + + print( + f"Results for llm correctness test for {model} queried with the {llm_api} api." + ) + print( + f"Errors: {summary_metrics[common_metrics.NUM_ERRORS]}, " + f"Error rate: {summary_metrics['error_rate']}" + ) + + print( + f"Mismatched: {summary_metrics['num_mismatched_requests']}, " + f"Mismatch rate: {summary_metrics['mismatch_rate']}" + ) + print(f"Completed: {summary_metrics[common_metrics.NUM_COMPLETED_REQUESTS]}") + print(f"Completed without errors: {summary_metrics['num_non_errored_requests']}") + + if results_dir: + file_name = f"{model}_correctness" + file_name = re.sub(r"[^\w\d-]+", "-", file_name) + file_name = re.sub(r"-{2,}", "-", file_name) + summary_file_name = f"{file_name}_summary" + individual_responses_filename = f"{file_name}_individual_responses" + summary_metrics.update(user_metadata) + results = LLMPerfResults(name=summary_file_name, metadata=summary_metrics) + results_dir = Path(results_dir) + if not results_dir.exists(): + results_dir.mkdir(parents=True) + elif not results_dir.is_dir(): + raise ValueError(f"{results_dir} is not a directory") + with open(results_dir / f"{summary_file_name}.json", "w") as f: + json.dump(results.to_dict(), f, indent=4) + with open(results_dir / f"{individual_responses_filename}.json", "w") as f: + json.dump(raw_results, f, indent=4) + + +args = argparse.ArgumentParser(description="Run a correctness test for a given model.") + +args.add_argument( + "--model", type=str, required=True, help="The model to use for this load test." +) +args.add_argument( + "--num-concurrent-requests", + type=int, + default=10, + help=("The number of concurrent requests to send. (default: %(default)s)"), +) +args.add_argument( + "--timeout", + type=int, + default=90, + help="The amount of time to run the load test for. (default: %(default)s)", +) +args.add_argument( + "--max-num-completed-requests", + type=int, + default=50, + help=( + "The number of requests to complete before finishing the test. Note " + "that its possible for the test to timeout first. (default: %(default)s)" + ), +) +args.add_argument( + "--additional-sampling-params", + type=str, + default="{}", + help=( + "Additional sampling params to send with the each request to the LLM API. " + "(default: %(default)s) No additional sampling params are sent." + ), +) +args.add_argument( + "--results-dir", + type=str, + default="", + help=( + "The directory to save the results to. " + "(`default: %(default)s`) No results are saved)" + ), +) +args.add_argument( + "--llm-api", + type=str, + default="openai", + help=( + f"The type of request to make. The supported llm apis are {SUPPORTED_APIS} " + " (`default: %(default)s`)" + ), +) +args.add_argument( + "--metadata", + type=str, + default="", + help=( + "A comma separated list of metadata to include in the results, e.g. " + "name=foo,bar=1. These will be added to the metadata field of the results. " + ), +) + +if __name__ == "__main__": + args = args.parse_args() + + env_vars = dict(os.environ) + ray.init(runtime_env={"env_vars": env_vars}) + # Parse user metadata. + user_metadata = {} + if args.metadata: + for item in args.metadata.split(","): + key, value = item.split("=") + user_metadata[key] = value + + run( + llm_api=args.llm_api, + model=args.model, + test_timeout_s=args.timeout, + max_num_completed_requests=args.max_num_completed_requests, + num_concurrent_requests=args.num_concurrent_requests, + additional_sampling_params=args.additional_sampling_params, + results_dir=args.results_dir, + user_metadata=user_metadata, + ) diff --git a/llmperf.py b/llmperf.py deleted file mode 100644 index 8ae30a2..0000000 --- a/llmperf.py +++ /dev/null @@ -1,484 +0,0 @@ -import argparse -from collections import defaultdict -import ray, openai -from num2words import num2words -import time, os, sys, re, json, datetime -import random -from dotenv import load_dotenv -import pandas as pd -from transformers import LlamaTokenizerFast -from huggingface_hub import InferenceClient - -FRAMEWORKS = [ - "anyscale", - "openai", - "fireworks", - "vertexai", - "sagemaker", - "perplexity", - "together", - "vllm", - "tgi" -] - -os.environ["TOKENIZERS_PARALLELISM"] = "true" - -# TODO(mwk): We use one tokenizer for all models, but we should -# consider using each framework's tokenizer - -# TODO(mwk): too much dependence on args globally. Clean up methods to not directly -# read from args to facilitate writing scripts. - -tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer") -sys_prompt = "You are a helpful assistant that respeonds with the answer in the most concise possible way." - - -class LineIterator: - """ - A helper class for parsing the byte stream input. - Reference: https://aws.amazon.com/blogs/machine-learning/elevating-the-generative-ai-experience-introducing-streaming-support-in-amazon-sagemaker-hosting/ - """ - - def __init__(self, stream): - self.byte_iterator = iter(stream) - self.buffer = io.BytesIO() - self.read_pos = 0 - self.ttft = 0 - - def __iter__(self): - return self - - def __next__(self): - while True: - self.buffer.seek(self.read_pos) - line = self.buffer.readline() - if line and line[-1] == ord("\n"): - if self.ttft == 0: - self.ttft = time.time() - self.read_pos += len(line) - return line[:-1], self.ttft, time.time() - # kyle: dealing with last ']' for chat output - if line and self.read_pos == self.buffer.getbuffer().nbytes - 1: - self.read_pos += 1 - return line, self.ttft, time.time() - try: - chunk = next(self.byte_iterator) - except StopIteration: - if self.read_pos < self.buffer.getbuffer().nbytes: - continue - raise - if "PayloadPart" not in chunk: - print("Unknown event type:" + chunk) - continue - self.buffer.seek(0, io.SEEK_END) - self.buffer.write(chunk["PayloadPart"]["Bytes"]) - - -# NOTE: The defaults are set to mirror our production traffic -def prompt_generator(num_digits=3, min_lines=15, max_lines=1000, file_lines=[]) -> str: - # Step 1: Generate a random number - # Generate the number of digits specified (e.g. if NUM_DIGITS = 3, then - # any number between 100 and 1000 is OK). - rnd_num = random.randrange(10 ** (num_digits - 1), 10 ** (num_digits)) - max_lines = max_lines if max_lines < len(file_lines) else len(file_lines) - rnd_num_lines = random.randrange(min_lines, max_lines) - rnd_picked_lines = "\n".join(random.sample(file_lines, rnd_num_lines)) - - # Step 2: convert to words. - rnd_num_words = num2words(rnd_num) - - # Step 3: convert to a prompt - user_prompt = f"Convert the following sequence of words into a number: {rnd_num_words}.\nPrint the number first. Then pick {args.req_lines} lines from these poem lines:\n{rnd_picked_lines}" - - return user_prompt, rnd_num - - -@ray.remote(num_cpus=0.001) -def validate(ep_config, sample_lines): - # The 4 is for the end and start tokens of the messages - prompt, rnd_num = prompt_generator( - args.num_digits, args.min_lines, args.max_lines, sample_lines - ) - tokens_in = len(tokenizer.encode(prompt)) + len(tokenizer.encode(sys_prompt)) + 4 - words = "" - id = None - st = et = ttft = 0 - if ep_config["framework"] in [ - "anyscale", - "openai", - "fireworks", - "perplexity", - "vllm", - ]: - messages = [ - {"role": "system", "content": sys_prompt}, - {"role": "user", "content": prompt}, - ] - try: - st = time.time() - response = openai.ChatCompletion.create( - model=ep_config["model"], - messages=messages, - api_key=ep_config["api_key"], - api_base=ep_config["api_base"], - max_tokens=args.max_tokens, - # Please keep temp at 0. Otherwise increases the number of mismatches. - temperature=0, - # Do not set to false. You will get bogus results. - stream=True, - ) - for tok in response: - id = tok.id - if tok.choices[0].delta: - delta = tok.choices[0].delta - if "content" in delta: - if ttft == 0: - ttft = time.time() - st - words += delta["content"] - et = time.time() - except Exception as e: - return ("Exception", -1, -1, -1, -1, str(e), "") - elif ep_config["framework"] == "together": - try: - st = time.time() - url = ep_config["api_base"] - payload = { - "model": ep_config["model"], - "prompt": sys_prompt + prompt, - "max_tokens": args.max_tokens, - "temperature": 0, - "stream_tokens": True, - } - headers = { - "accept": "application/json", - "content-type": "application/json", - "Authorization": f"Bearer {ep_config['api_key']}", - } - response = requests.post(url, json=payload, headers=headers) - response.raise_for_status() - client = sseclient.SSEClient(response) - for event in client.events(): - if ttft == 0: - ttft = time.time() - st - if event.data == "[DONE]": - break - partial_result = json.loads(event.data) - words += partial_result["choices"][0]["text"] - et = time.time() - except Exception as e: - return ("Exception", -1, -1, -1, -1, str(e), "") - elif ep_config["framework"] == "vertexai": - chat_model = ChatModel.from_pretrained(ep_config["model"]) - chat = chat_model.start_chat( - context=sys_prompt, - ) - try: - st = time.time() - responses = chat.send_message_streaming( - message=prompt, - temperature=0, - max_output_tokens=args.max_tokens, - ) - results = [] - for response in responses: - if ttft == 0: - ttft = time.time() - st - results.append(str(response)) - words = "".join(results) - - et = time.time() - except Exception as e: - return ("Exception", -1, -1, -1, -1, str(e), "") - - elif ep_config["framework"] == "sagemaker": - sm_runtime = boto3.client("sagemaker-runtime", region_name=ep_config["region"]) - message = { - "inputs": [ - [ - {"role": "system", "content": sys_prompt}, - {"role": "user", "content": prompt}, - ] - ], - "parameters": { - "max_new_tokens": args.max_tokens, - ## we can't set temperature to 0 in SM - "temperature": 0.01, - }, - } - try: - st = time.time() - response = sm_runtime.invoke_endpoint_with_response_stream( - EndpointName=ep_config["endpoint_name"], - ContentType="application/json", - Body=json.dumps(message), - CustomAttributes="accept_eula=true", - ) - event_stream = response["Body"] - json_byte = b"" - for line, ttft, et in LineIterator(event_stream): - json_byte += line - resp = json.loads(json_byte) - ttft = ttft - st - words = resp[0]["generation"]["content"] - et = time.time() - except Exception as e: - return ("Exception", -1, -1, -1, -1, str(e), "") - elif ep_config["framework"] == "tgi": - - model = ep_config["model"] if ep_config["api_base"] is None else ep_config["api_base"] - api_key = ep_config["api_key"] - client = InferenceClient(model=model, token=api_key) - query = f"[INST] {sys_prompt} {prompt} [/INST]" - try: - st = time.time() - response = client.text_generation(query, max_new_tokens=args.max_tokens, temperature=.1, stream=True) - for tok in response: - words += tok - if ttft == 0: - ttft = time.time() - st - et = time.time() - except Exception as e: - return ("Exception", -1, -1, -1, -1, str(e), "") - - # Get rid of commas. - tokens_out = len(tokenizer.encode(words)) - nums = re.findall(r"\d+", words) - if len(nums) > 0: - retval = int(nums[0]) - valid = "OK" - cause = "" - if retval != rnd_num: - valid = "Mismatch" - cause = f"Input = {rnd_num} output = {retval}\n.Output:\n {words}" - else: - valid = "Mismatch" - cause = f"Output unparseable. Input = {rnd_num}. Output:\n {words}" - return (valid, ttft, et - st, tokens_in, tokens_out, cause, id) - - -def endpoint_evaluation(ep_config, sample_lines): - query_results = [] - overall_start_time = time.time() - num_rounds = int(args.total_requests / args.concur_requests) - for i in range(num_rounds): - print(f"Starting round {i}") - st = time.time() - futures = [ - validate.remote(ep_config, sample_lines) - for _ in range(args.concur_requests) - ] - results = ray.get(futures) - query_results.extend(results) - et = time.time() - elt = et - st - tosleep = args.sleep - elt - if tosleep > 0: - print("Sleeping for %.4f seconds" % tosleep) - time.sleep(tosleep) - else: - print(f"No need to sleep for the next round") - print(f"Round {i} complete") - overall_end_time = time.time() - print(f"Overall execution time {overall_end_time-overall_start_time}") - return query_results - - -def results_analysis(query_results, results_dict): - df = pd.DataFrame( - query_results, - columns=[ - "valid", - "ttft", - "total_time", - "tokens_in", - "tokens_out", - "cause", - "id", - ], - ) - ts = int(time.time()) - fn = f'{results_dict["framework"]}-{ts}_raw.json' - df.to_json(fn) - print(f"Results saved to: {fn}") - - print("Validity results:") - print(df["valid"].value_counts()) - - value_counts = df["valid"].value_counts() - results_dict["num_valid"] = int(value_counts.get("OK", 0)) - results_dict["num_exceptions"] = int(value_counts.get("Exception", 0)) - results_dict["num_mismatch"] = int(value_counts.get("Mismatch", 0)) - results_dict["valid_rate"] = float( - results_dict["num_valid"] / results_dict["total_requests"] - ) - results_dict["mismatch_rate"] = float( - results_dict["num_mismatch"] / results_dict["total_requests"] - ) - results_dict["exception_rate"] = float( - results_dict["num_exceptions"] / results_dict["total_requests"] - ) - cdf = df[df.valid != "Exception"].copy() - print(f"Clean DF is: {len(cdf)}") - if len(cdf) > 0: - cdf["total_tokens_per_s"] = (cdf.tokens_out + cdf.tokens_in) / cdf.total_time - cdf["out_tokens_per_s"] = cdf.tokens_out / cdf.total_time - cdf["inter_tokens_delay"] = cdf.total_time / cdf.tokens_out - mean_e2e = cdf["total_time"].mean() - mean_tokens_in = cdf["tokens_in"].mean() - mean_tokens_out = cdf["tokens_out"].mean() - mean_ttft = cdf["ttft"].mean() - max_ttft = cdf["ttft"].max() - gt_3_ttft = len(cdf[cdf["ttft"] > 3]) / len(cdf) - print(f"Mean End-to-end: {mean_e2e*1000.0:.0f} ms") - print( - f"Mean TTFT: {mean_ttft*1000:.0f} ms (mean tokens in: {mean_tokens_in:.0f}, out: {mean_tokens_out:.0f})" - ) - print(f"Max TTFT: {max_ttft*1000:.0f} ms") - print(f"TTFT > 3 s: {gt_3_ttft*100:.2f}%") - print( - f"ITL (out): {cdf.inter_tokens_delay.mean()*1000:.2f} ms/token, mean tokens/s output (out): {cdf.out_tokens_per_s.mean():.2f} token/s" - ) - # Put things in a dictionary and save the results - results_dict["end_timestamp"] = datetime.datetime.fromtimestamp(ts).isoformat() - results_dict["total_time"] = float(cdf.total_time.mean()) - results_dict["mean_ttft"] = int(f"{mean_ttft*1000:.0f}") - results_dict["mean_tokens_in"] = mean_tokens_in - results_dict["mean_tokens_out"] = mean_tokens_out - results_dict["total_tokens_per_s"] = float(cdf.total_tokens_per_s.mean()) - results_dict["out_tokens_per_s"] = float(cdf.out_tokens_per_s.mean()) - results_dict["inter_token_delay"] = float(cdf.inter_tokens_delay.mean() * 1000) - - def error_analysis(df): - # Group exceptions based on exceptions cause. - exceptions = df[df.valid == "Exception"] - exceptions_by_cause = defaultdict(int) - # Ideally we should group by some error code - for cause in exceptions["cause"]: - exceptions_by_cause[cause] += 1 - print("Exceptions by cause:") - for cause, count in exceptions_by_cause.items(): - print(f" - {count}: {cause}") - - error_analysis(df) - results_dict["raw_output"] = fn - benchmark_result = f"{results_dict['framework']}-{ts}.json" - - with open(benchmark_result, "w") as fw: - fw.write(json.dumps(results_dict)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-f", "--framework", type=str, default="anyscale", help="Test frame name" - ) - parser.add_argument( - "-m", - "--model", - type=str, - default="meta-llama/Llama-2-70b-chat-hf", - help="model name", - ) - parser.add_argument( - "--random-lines-file-name", - type=str, - default="sonnet.txt", - help="Prompt sample file name", - ) - parser.add_argument("--min-lines", type=int, default=15, help="min number of lines") - parser.add_argument("--max-lines", type=int, default=50, help="max number of lines") - parser.add_argument( - "--req-lines", - type=int, - default=7, - help="Number of lines to request in prompt. Affects tokens out.", - ) - parser.add_argument( - "--num-digits", type=int, default=3, help="number of digits for mismatch search" - ) - parser.add_argument( - "--sleep", - type=int, - default=0, - help="sleep between rounds of requests (to deal with rate limiting)", - ) - parser.add_argument( - "-c", - "--concur-requests", - type=int, - default=10, - help="number of concurrent requests", - ) - parser.add_argument( - "-r", "--total-requests", type=int, default=300, help="number of total requests" - ) - parser.add_argument( - "--max-tokens", - type=int, - default=384, - help="Upper limit on the number of returned tokens to prevent 'runaway LLMs'.", - ) - parser.add_argument( - "--random-seed", - type=int, - default=117, - help="Random seed to standardize results. By default fully random.", - ) - args = parser.parse_args() - load_dotenv() - endpoint_config = {} - if args.random_seed >= 0: - random.seed(args.random_seed) - if args.framework not in FRAMEWORKS: - print(f"Choose a framework in {FRAMEWORKS}") - sys.exit(0) - elif args.framework == "anyscale": - endpoint_config["api_base"] = os.environ["ANYSCALE_API_BASE"] - endpoint_config["api_key"] = os.environ["ANYSCALE_API_KEY"] - elif args.framework == "openai": - endpoint_config["api_base"] = os.environ["OPENAI_API_BASE"] - endpoint_config["api_key"] = os.environ["OPENAI_API_KEY"] - elif args.framework == "fireworks": - endpoint_config["api_base"] = os.environ["FIREWORKS_API_BASE"] - endpoint_config["api_key"] = os.environ["FIREWORKS_API_KEY"] - elif args.framework == "perplexity": - endpoint_config["api_base"] = os.environ["PERPLEXITY_API_BASE"] - endpoint_config["api_key"] = os.environ["PERPLEXITY_API_KEY"] - elif args.framework == "together": - import requests, sseclient - - endpoint_config["api_base"] = os.environ["TOGETHER_API_BASE"] - endpoint_config["api_key"] = os.environ["TOGETHER_API_KEY"] - elif args.framework == "vertexai": - import vertexai - from vertexai.preview.language_models import ChatModel - - endpoint_config["api_base"] = "VertexAI Endpoint" - endpoint_config["project_id"] = os.environ["VERTEXAI_PROJECT_ID"] - vertexai.init(project=endpoint_config["project_id"]) - elif args.framework == "sagemaker": - import boto3 - - endpoint_config["api_base"] = "SageMaker Endpoint" - endpoint_config["region"] = os.environ["SAGEMAKER_REGION"] - endpoint_config["endpoint_name"] = os.environ["SAGEMAKER_ENDPOINT_NAME"] - elif args.framework == "vllm": - endpoint_config["api_base"] = os.environ["VLLM_API_BASE"] - endpoint_config["api_key"] = os.environ["VLLM_API_KEY"] - elif args.framework == "tgi": - endpoint_config["api_base"]=os.environ["TGI_API_BASE"] - endpoint_config["api_key"]=os.environ["TGI_API_KEY"] - - endpoint_config["framework"] = args.framework - endpoint_config["model"] = args.model - - f = open(args.random_lines_file_name, "r") - sample_lines = f.readlines() - f.close() - - ## Endpoint evaluation - query_results = endpoint_evaluation(endpoint_config, sample_lines) - - ## Results Analysis - args.api_base = endpoint_config["api_base"] - results_analysis(query_results, vars(args)) diff --git a/optional.txt b/optional.txt deleted file mode 100644 index 3d5578f..0000000 --- a/optional.txt +++ /dev/null @@ -1,2 +0,0 @@ -boto3 -google-cloud-aiplatform diff --git a/pre-commit.sh b/pre-commit.sh new file mode 100755 index 0000000..0eb053b --- /dev/null +++ b/pre-commit.sh @@ -0,0 +1,5 @@ +#!/bin/bash +echo "Running pre-hooks before committing..." + +echo "======FORMAT=====" +black . -q diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7687fb2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[build-system] +requires = ["setuptools>=43.0.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "LLMPerf" +version = "0.1.0" +description = "A framework for load testing LLM APIs" +authors = [{name="Avnish Narayan", email="avnish@anyscale.com"}] +license = {text= "Apache-2.0"} +requires-python = ">=3.8, <3.11" +dependencies = ["pydantic<2.5", + "ray", + "pytest>=6.0", + "seaborn>=0.11", + "awscli>=1.22", + "typer>=0.4", + "litellm>=0.1.738", + "num2words", + "transformers", + "tqdm", + "boto3", + "google-cloud-aiplatform"] diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..4960786 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +# For lints +black \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 573f8bf..0000000 --- a/requirements.txt +++ /dev/null @@ -1,18 +0,0 @@ -openai -num2words -python_dotenv -pandas -tiktoken -transformers==4.33.3 -matplotlib -scikit-learn -ray[default] -tokenizers==0.13.3 -huggingface-hub -### Extra dependency for Together -#requests -#sseclient-py -### Extra dependency for Vertex AI -#google-cloud-aiplatform -### Extra dependency for SageMaker -#boto3 diff --git a/sonnet.txt b/sonnet.txt deleted file mode 100644 index 34c444e..0000000 --- a/sonnet.txt +++ /dev/null @@ -1,518 +0,0 @@ -FROM fairest creatures we desire increase, -That thereby beauty's rose might never die, -But as the riper should by time decease, -His tender heir might bear his memory: -But thou, contracted to thine own bright eyes, -Feed'st thy light'st flame with self-substantial fuel, -Making a famine where abundance lies, -Thyself thy foe, to thy sweet self too cruel. -Thou that art now the world's fresh ornament -And only herald to the gaudy spring, -Within thine own bud buriest thy content -And, tender churl, makest waste in niggarding. -Pity the world, or else this glutton be, -To eat the world's due, by the grave and thee. -When forty winters shall beseige thy brow, -And dig deep trenches in thy beauty's field, -Thy youth's proud livery, so gazed on now, -Will be a tatter'd weed, of small worth held: -Then being ask'd where all thy beauty lies, -Where all the treasure of thy lusty days, -To say, within thine own deep-sunken eyes, -Were an all-eating shame and thriftless praise. -How much more praise deserved thy beauty's use, -If thou couldst answer 'This fair child of mine -Shall sum my count and make my old excuse,' -Proving his beauty by succession thine! -This were to be new made when thou art old, -And see thy blood warm when thou feel'st it cold. -Look in thy glass, and tell the face thou viewest -Now is the time that face should form another; -Whose fresh repair if now thou not renewest, -Thou dost beguile the world, unbless some mother. -For where is she so fair whose unear'd womb -Disdains the tillage of thy husbandry? -Or who is he so fond will be the tomb -Of his self-love, to stop posterity? -Thou art thy mother's glass, and she in thee -Calls back the lovely April of her prime: -So thou through windows of thine age shall see -Despite of wrinkles this thy golden time. -But if thou live, remember'd not to be, -Die single, and thine image dies with thee. -Unthrifty loveliness, why dost thou spend -Upon thyself thy beauty's legacy? -Nature's bequest gives nothing but doth lend, -And being frank she lends to those are free. -Then, beauteous niggard, why dost thou abuse -The bounteous largess given thee to give? -Profitless usurer, why dost thou use -So great a sum of sums, yet canst not live? -For having traffic with thyself alone, -Thou of thyself thy sweet self dost deceive. -Then how, when nature calls thee to be gone, -What acceptable audit canst thou leave? -Thy unused beauty must be tomb'd with thee, -Which, used, lives th' executor to be. -Those hours, that with gentle work did frame -The lovely gaze where every eye doth dwell, -Will play the tyrants to the very same -And that unfair which fairly doth excel: -For never-resting time leads summer on -To hideous winter and confounds him there; -Sap cheque'd with frost and lusty leaves quite gone, -Beauty o'ersnow'd and bareness every where: -Then, were not summer's distillation left, -A liquid prisoner pent in walls of glass, -Beauty's effect with beauty were bereft, -Nor it nor no remembrance what it was: -But flowers distill'd though they with winter meet, -Leese but their show; their substance still lives sweet. -Then let not winter's ragged hand deface -In thee thy summer, ere thou be distill'd: -Make sweet some vial; treasure thou some place -With beauty's treasure, ere it be self-kill'd. -That use is not forbidden usury, -Which happies those that pay the willing loan; -That's for thyself to breed another thee, -Or ten times happier, be it ten for one; -Ten times thyself were happier than thou art, -If ten of thine ten times refigured thee: -Then what could death do, if thou shouldst depart, -Leaving thee living in posterity? -Be not self-will'd, for thou art much too fair -To be death's conquest and make worms thine heir. -Lo! in the orient when the gracious light -Lifts up his burning head, each under eye -Doth homage to his new-appearing sight, -Serving with looks his sacred majesty; -And having climb'd the steep-up heavenly hill, -Resembling strong youth in his middle age, -yet mortal looks adore his beauty still, -Attending on his golden pilgrimage; -But when from highmost pitch, with weary car, -Like feeble age, he reeleth from the day, -The eyes, 'fore duteous, now converted are -From his low tract and look another way: -So thou, thyself out-going in thy noon, -Unlook'd on diest, unless thou get a son. -Music to hear, why hear'st thou music sadly? -Sweets with sweets war not, joy delights in joy. -Why lovest thou that which thou receivest not gladly, -Or else receivest with pleasure thine annoy? -If the true concord of well-tuned sounds, -By unions married, do offend thine ear, -They do but sweetly chide thee, who confounds -In singleness the parts that thou shouldst bear. -Mark how one string, sweet husband to another, -Strikes each in each by mutual ordering, -Resembling sire and child and happy mother -Who all in one, one pleasing note do sing: -Whose speechless song, being many, seeming one, -Sings this to thee: 'thou single wilt prove none.' -Is it for fear to wet a widow's eye -That thou consumest thyself in single life? -Ah! if thou issueless shalt hap to die. -The world will wail thee, like a makeless wife; -The world will be thy widow and still weep -That thou no form of thee hast left behind, -When every private widow well may keep -By children's eyes her husband's shape in mind. -Look, what an unthrift in the world doth spend -Shifts but his place, for still the world enjoys it; -But beauty's waste hath in the world an end, -And kept unused, the user so destroys it. -No love toward others in that bosom sits -That on himself such murderous shame commits. -For shame! deny that thou bear'st love to any, -Who for thyself art so unprovident. -Grant, if thou wilt, thou art beloved of many, -But that thou none lovest is most evident; -For thou art so possess'd with murderous hate -That 'gainst thyself thou stick'st not to conspire. -Seeking that beauteous roof to ruinate -Which to repair should be thy chief desire. -O, change thy thought, that I may change my mind! -Shall hate be fairer lodged than gentle love? -Be, as thy presence is, gracious and kind, -Or to thyself at least kind-hearted prove: -Make thee another self, for love of me, -That beauty still may live in thine or thee. -As fast as thou shalt wane, so fast thou growest -In one of thine, from that which thou departest; -And that fresh blood which youngly thou bestowest -Thou mayst call thine when thou from youth convertest. -Herein lives wisdom, beauty and increase: -Without this, folly, age and cold decay: -If all were minded so, the times should cease -And threescore year would make the world away. -Let those whom Nature hath not made for store, -Harsh featureless and rude, barrenly perish: -Look, whom she best endow'd she gave the more; -Which bounteous gift thou shouldst in bounty cherish: -She carved thee for her seal, and meant thereby -Thou shouldst print more, not let that copy die. -When I do count the clock that tells the time, -And see the brave day sunk in hideous night; -When I behold the violet past prime, -And sable curls all silver'd o'er with white; -When lofty trees I see barren of leaves -Which erst from heat did canopy the herd, -And summer's green all girded up in sheaves -Borne on the bier with white and bristly beard, -Then of thy beauty do I question make, -That thou among the wastes of time must go, -Since sweets and beauties do themselves forsake -And die as fast as they see others grow; -And nothing 'gainst Time's scythe can make defence -Save breed, to brave him when he takes thee hence. -O, that you were yourself! but, love, you are -No longer yours than you yourself here live: -Against this coming end you should prepare, -And your sweet semblance to some other give. -So should that beauty which you hold in lease -Find no determination: then you were -Yourself again after yourself's decease, -When your sweet issue your sweet form should bear. -Who lets so fair a house fall to decay, -Which husbandry in honour might uphold -Against the stormy gusts of winter's day -And barren rage of death's eternal cold? -O, none but unthrifts! Dear my love, you know -You had a father: let your son say so. -Not from the stars do I my judgment pluck; -And yet methinks I have astronomy, -But not to tell of good or evil luck, -Of plagues, of dearths, or seasons' quality; -Nor can I fortune to brief minutes tell, -Pointing to each his thunder, rain and wind, -Or say with princes if it shall go well, -By oft predict that I in heaven find: -But from thine eyes my knowledge I derive, -And, constant stars, in them I read such art -As truth and beauty shall together thrive, -If from thyself to store thou wouldst convert; -Or else of thee this I prognosticate: -Thy end is truth's and beauty's doom and date. -When I consider every thing that grows -Holds in perfection but a little moment, -That this huge stage presenteth nought but shows -Whereon the stars in secret influence comment; -When I perceive that men as plants increase, -Cheered and cheque'd even by the self-same sky, -Vaunt in their youthful sap, at height decrease, -And wear their brave state out of memory; -Then the conceit of this inconstant stay -Sets you most rich in youth before my sight, -Where wasteful Time debateth with Decay, -To change your day of youth to sullied night; -And all in war with Time for love of you, -As he takes from you, I engraft you new. -But wherefore do not you a mightier way -Make war upon this bloody tyrant, Time? -And fortify yourself in your decay -With means more blessed than my barren rhyme? -Now stand you on the top of happy hours, -And many maiden gardens yet unset -With virtuous wish would bear your living flowers, -Much liker than your painted counterfeit: -So should the lines of life that life repair, -Which this, Time's pencil, or my pupil pen, -Neither in inward worth nor outward fair, -Can make you live yourself in eyes of men. -To give away yourself keeps yourself still, -And you must live, drawn by your own sweet skill. -Who will believe my verse in time to come, -If it were fill'd with your most high deserts? -Though yet, heaven knows, it is but as a tomb -Which hides your life and shows not half your parts. -If I could write the beauty of your eyes -And in fresh numbers number all your graces, -The age to come would say 'This poet lies: -Such heavenly touches ne'er touch'd earthly faces.' -So should my papers yellow'd with their age -Be scorn'd like old men of less truth than tongue, -And your true rights be term'd a poet's rage -And stretched metre of an antique song: -But were some child of yours alive that time, -You should live twice; in it and in my rhyme. -Shall I compare thee to a summer's day? -Thou art more lovely and more temperate: -Rough winds do shake the darling buds of May, -And summer's lease hath all too short a date: -Sometime too hot the eye of heaven shines, -And often is his gold complexion dimm'd; -And every fair from fair sometime declines, -By chance or nature's changing course untrimm'd; -But thy eternal summer shall not fade -Nor lose possession of that fair thou owest; -Nor shall Death brag thou wander'st in his shade, -When in eternal lines to time thou growest: -So long as men can breathe or eyes can see, -So long lives this and this gives life to thee. -Devouring Time, blunt thou the lion's paws, -And make the earth devour her own sweet brood; -Pluck the keen teeth from the fierce tiger's jaws, -And burn the long-lived phoenix in her blood; -Make glad and sorry seasons as thou fleets, -And do whate'er thou wilt, swift-footed Time, -To the wide world and all her fading sweets; -But I forbid thee one most heinous crime: -O, carve not with thy hours my love's fair brow, -Nor draw no lines there with thine antique pen; -Him in thy course untainted do allow -For beauty's pattern to succeeding men. -Yet, do thy worst, old Time: despite thy wrong, -My love shall in my verse ever live young. -A woman's face with Nature's own hand painted -Hast thou, the master-mistress of my passion; -A woman's gentle heart, but not acquainted -With shifting change, as is false women's fashion; -An eye more bright than theirs, less false in rolling, -Gilding the object whereupon it gazeth; -A man in hue, all 'hues' in his controlling, -Much steals men's eyes and women's souls amazeth. -And for a woman wert thou first created; -Till Nature, as she wrought thee, fell a-doting, -And by addition me of thee defeated, -By adding one thing to my purpose nothing. -But since she prick'd thee out for women's pleasure, -Mine be thy love and thy love's use their treasure. -So is it not with me as with that Muse -Stirr'd by a painted beauty to his verse, -Who heaven itself for ornament doth use -And every fair with his fair doth rehearse -Making a couplement of proud compare, -With sun and moon, with earth and sea's rich gems, -With April's first-born flowers, and all things rare -That heaven's air in this huge rondure hems. -O' let me, true in love, but truly write, -And then believe me, my love is as fair -As any mother's child, though not so bright -As those gold candles fix'd in heaven's air: -Let them say more than like of hearsay well; -I will not praise that purpose not to sell. -My glass shall not persuade me I am old, -So long as youth and thou are of one date; -But when in thee time's furrows I behold, -Then look I death my days should expiate. -For all that beauty that doth cover thee -Is but the seemly raiment of my heart, -Which in thy breast doth live, as thine in me: -How can I then be elder than thou art? -O, therefore, love, be of thyself so wary -As I, not for myself, but for thee will; -Bearing thy heart, which I will keep so chary -As tender nurse her babe from faring ill. -Presume not on thy heart when mine is slain; -Thou gavest me thine, not to give back again. -As an unperfect actor on the stage -Who with his fear is put besides his part, -Or some fierce thing replete with too much rage, -Whose strength's abundance weakens his own heart. -So I, for fear of trust, forget to say -The perfect ceremony of love's rite, -And in mine own love's strength seem to decay, -O'ercharged with burden of mine own love's might. -O, let my books be then the eloquence -And dumb presagers of my speaking breast, -Who plead for love and look for recompense -More than that tongue that more hath more express'd. -O, learn to read what silent love hath writ: -To hear with eyes belongs to love's fine wit. -Mine eye hath play'd the painter and hath stell'd -Thy beauty's form in table of my heart; -My body is the frame wherein 'tis held, -And perspective it is the painter's art. -For through the painter must you see his skill, -To find where your true image pictured lies; -Which in my bosom's shop is hanging still, -That hath his windows glazed with thine eyes. -Now see what good turns eyes for eyes have done: -Mine eyes have drawn thy shape, and thine for me -Are windows to my breast, where-through the sun -Delights to peep, to gaze therein on thee; -Yet eyes this cunning want to grace their art; -They draw but what they see, know not the heart. -Let those who are in favour with their stars -Of public honour and proud titles boast, -Whilst I, whom fortune of such triumph bars, -Unlook'd for joy in that I honour most. -Great princes' favourites their fair leaves spread -But as the marigold at the sun's eye, -And in themselves their pride lies buried, -For at a frown they in their glory die. -The painful warrior famoused for fight, -After a thousand victories once foil'd, -Is from the book of honour razed quite, -And all the rest forgot for which he toil'd: -Then happy I, that love and am beloved -Where I may not remove nor be removed. -Lord of my love, to whom in vassalage -Thy merit hath my duty strongly knit, -To thee I send this written embassage, -To witness duty, not to show my wit: -Duty so great, which wit so poor as mine -May make seem bare, in wanting words to show it, -But that I hope some good conceit of thine -In thy soul's thought, all naked, will bestow it; -Till whatsoever star that guides my moving -Points on me graciously with fair aspect -And puts apparel on my tatter'd loving, -To show me worthy of thy sweet respect: -Then may I dare to boast how I do love thee; -Till then not show my head where thou mayst prove me. -Weary with toil, I haste me to my bed, -The dear repose for limbs with travel tired; -But then begins a journey in my head, -To work my mind, when body's work's expired: -For then my thoughts, from far where I abide, -Intend a zealous pilgrimage to thee, -And keep my drooping eyelids open wide, -Looking on darkness which the blind do see -Save that my soul's imaginary sight -Presents thy shadow to my sightless view, -Which, like a jewel hung in ghastly night, -Makes black night beauteous and her old face new. -Lo! thus, by day my limbs, by night my mind, -For thee and for myself no quiet find. -How can I then return in happy plight, -That am debarr'd the benefit of rest? -When day's oppression is not eased by night, -But day by night, and night by day, oppress'd? -And each, though enemies to either's reign, -Do in consent shake hands to torture me; -The one by toil, the other to complain -How far I toil, still farther off from thee. -I tell the day, to please them thou art bright -And dost him grace when clouds do blot the heaven: -So flatter I the swart-complexion'd night, -When sparkling stars twire not thou gild'st the even. -But day doth daily draw my sorrows longer -And night doth nightly make grief's strength seem stronger. -When, in disgrace with fortune and men's eyes, -I all alone beweep my outcast state -And trouble deal heaven with my bootless cries -And look upon myself and curse my fate, -Wishing me like to one more rich in hope, -Featured like him, like him with friends possess'd, -Desiring this man's art and that man's scope, -With what I most enjoy contented least; -Yet in these thoughts myself almost despising, -Haply I think on thee, and then my state, -Like to the lark at break of day arising -From sullen earth, sings hymns at heaven's gate; -For thy sweet love remember'd such wealth brings -That then I scorn to change my state with kings. -When to the sessions of sweet silent thought -I summon up remembrance of things past, -I sigh the lack of many a thing I sought, -And with old woes new wail my dear time's waste: -Then can I drown an eye, unused to flow, -For precious friends hid in death's dateless night, -And weep afresh love's long since cancell'd woe, -And moan the expense of many a vanish'd sight: -Then can I grieve at grievances foregone, -And heavily from woe to woe tell o'er -The sad account of fore-bemoaned moan, -Which I new pay as if not paid before. -But if the while I think on thee, dear friend, -All losses are restored and sorrows end. -Thy bosom is endeared with all hearts, -Which I by lacking have supposed dead, -And there reigns love and all love's loving parts, -And all those friends which I thought buried. -How many a holy and obsequious tear -Hath dear religious love stol'n from mine eye -As interest of the dead, which now appear -But things removed that hidden in thee lie! -Thou art the grave where buried love doth live, -Hung with the trophies of my lovers gone, -Who all their parts of me to thee did give; -That due of many now is thine alone: -Their images I loved I view in thee, -And thou, all they, hast all the all of me. -If thou survive my well-contented day, -When that churl Death my bones with dust shall cover, -And shalt by fortune once more re-survey -These poor rude lines of thy deceased lover, -Compare them with the bettering of the time, -And though they be outstripp'd by every pen, -Reserve them for my love, not for their rhyme, -Exceeded by the height of happier men. -O, then vouchsafe me but this loving thought: -'Had my friend's Muse grown with this growing age, -A dearer birth than this his love had brought, -To march in ranks of better equipage: -But since he died and poets better prove, -Theirs for their style I'll read, his for his love.' -Full many a glorious morning have I seen -Flatter the mountain-tops with sovereign eye, -Kissing with golden face the meadows green, -Gilding pale streams with heavenly alchemy; -Anon permit the basest clouds to ride -With ugly rack on his celestial face, -And from the forlorn world his visage hide, -Stealing unseen to west with this disgrace: -Even so my sun one early morn did shine -With all triumphant splendor on my brow; -But out, alack! he was but one hour mine; -The region cloud hath mask'd him from me now. -Yet him for this my love no whit disdaineth; -Suns of the world may stain when heaven's sun staineth. -Why didst thou promise such a beauteous day, -And make me travel forth without my cloak, -To let base clouds o'ertake me in my way, -Hiding thy bravery in their rotten smoke? -'Tis not enough that through the cloud thou break, -To dry the rain on my storm-beaten face, -For no man well of such a salve can speak -That heals the wound and cures not the disgrace: -Nor can thy shame give physic to my grief; -Though thou repent, yet I have still the loss: -The offender's sorrow lends but weak relief -To him that bears the strong offence's cross. -Ah! but those tears are pearl which thy love sheds, -And they are rich and ransom all ill deeds. -No more be grieved at that which thou hast done: -Roses have thorns, and silver fountains mud; -Clouds and eclipses stain both moon and sun, -And loathsome canker lives in sweetest bud. -All men make faults, and even I in this, -Authorizing thy trespass with compare, -Myself corrupting, salving thy amiss, -Excusing thy sins more than thy sins are; -For to thy sensual fault I bring in sense-- -Thy adverse party is thy advocate-- -And 'gainst myself a lawful plea commence: -Such civil war is in my love and hate -That I an accessary needs must be -To that sweet thief which sourly robs from me. -Let me confess that we two must be twain, -Although our undivided loves are one: -So shall those blots that do with me remain -Without thy help by me be borne alone. -In our two loves there is but one respect, -Though in our lives a separable spite, -Which though it alter not love's sole effect, -Yet doth it steal sweet hours from love's delight. -I may not evermore acknowledge thee, -Lest my bewailed guilt should do thee shame, -Nor thou with public kindness honour me, -Unless thou take that honour from thy name: -But do not so; I love thee in such sort -As, thou being mine, mine is thy good report. -As a decrepit father takes delight -To see his active child do deeds of youth, -So I, made lame by fortune's dearest spite, -Take all my comfort of thy worth and truth. -For whether beauty, birth, or wealth, or wit, -Or any of these all, or all, or more, -Entitled in thy parts do crowned sit, -I make my love engrafted to this store: -So then I am not lame, poor, nor despised, -Whilst that this shadow doth such substance give -That I in thy abundance am sufficed -And by a part of all thy glory live. -Look, what is best, that best I wish in thee: -This wish I have; then ten times happy me! \ No newline at end of file diff --git a/src/llmperf/__init__.py b/src/llmperf/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/llmperf/__init__.py @@ -0,0 +1 @@ + diff --git a/src/llmperf/common.py b/src/llmperf/common.py new file mode 100644 index 0000000..3efefa1 --- /dev/null +++ b/src/llmperf/common.py @@ -0,0 +1,38 @@ +from typing import List +from llmperf.ray_clients.litellm_client import LiteLLMClient +from llmperf.ray_clients.openai_chat_completions_client import ( + OpenAIChatCompletionsClient, +) +from llmperf.ray_clients.sagemaker_client import SageMakerClient +from llmperf.ray_clients.vertexai_client import VertexAIClient +from llmperf.ray_llm_client import LLMClient + + +SUPPORTED_APIS = ["openai", "anthropic", "litellm"] + + +def construct_clients(llm_api: str, num_clients: int) -> List[LLMClient]: + """Construct LLMClients that will be used to make requests to the LLM API. + + Args: + llm_api: The name of the LLM API to use. + num_clients: The number of concurrent requests to make. + + Returns: + The constructed LLMCLients + + """ + if llm_api == "openai": + clients = [OpenAIChatCompletionsClient.remote() for _ in range(num_clients)] + elif llm_api == "sagemaker": + clients = [SageMakerClient.remote() for _ in range(num_clients)] + elif llm_api == "vertexai": + clients = [VertexAIClient.remote() for _ in range(num_clients)] + elif llm_api in SUPPORTED_APIS: + clients = [LiteLLMClient.remote() for _ in range(num_clients)] + else: + raise ValueError( + f"llm_api must be one of the supported LLM APIs: {SUPPORTED_APIS}" + ) + + return clients diff --git a/src/llmperf/common_metrics.py b/src/llmperf/common_metrics.py new file mode 100644 index 0000000..40e2112 --- /dev/null +++ b/src/llmperf/common_metrics.py @@ -0,0 +1,17 @@ +# TODO (Avnishn): compute metrics in class +INTER_TOKEN_LAT = "inter_token_latency_s" +TTFT = "ttft_s" +E2E_LAT = "end_to_end_latency_s" +NUM_INPUT_TOKENS = "number_input_tokens" +NUM_OUTPUT_TOKENS = "number_output_tokens" +NUM_TOTAL_TOKENS = "number_total_tokens" +REQ_OUTPUT_THROUGHPUT = "request_output_throughput_token_per_s" +ERROR_MSG = "error_msg" +ERROR_CODE = "error_code" +ERROR_CODE_FREQ = "error_code_frequency" +NUM_ERRORS = "number_errors" +OUTPUT_THROUGHPUT = "mean_output_throughput_token_per_s" +NUM_COMPLETED_REQUESTS = "num_completed_requests" +COMPLETED_REQUESTS_PER_MIN = "num_completed_requests_per_min" +ERROR_RATE = "error_rate" +NUM_REQ_STARTED = "num_requests_started" diff --git a/src/llmperf/models.py b/src/llmperf/models.py new file mode 100644 index 0000000..be0d7ea --- /dev/null +++ b/src/llmperf/models.py @@ -0,0 +1,21 @@ +from typing import Any, Dict, List, Optional, Tuple +from pydantic import BaseModel + + +class RequestConfig(BaseModel): + """The configuration for a request to the LLM API. + + Args: + model: The model to use. + prompt: The prompt to provide to the LLM API. + sampling_params: Additional sampling parameters to send with the request. + For more information see the Router app's documentation for the completions + llm_api: The name of the LLM API to send the request to. + metadata: Additional metadata to attach to the request for logging or validation purposes. + """ + + model: str + prompt: Tuple[str, int] + sampling_params: Optional[Dict[str, Any]] = None + llm_api: Optional[str] = None + metadata: Optional[Dict[str, Any]] = None diff --git a/src/llmperf/ray_clients/__init__.py b/src/llmperf/ray_clients/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/llmperf/ray_clients/litellm_client.py b/src/llmperf/ray_clients/litellm_client.py new file mode 100644 index 0000000..b99201e --- /dev/null +++ b/src/llmperf/ray_clients/litellm_client.py @@ -0,0 +1,100 @@ +import time +from typing import Any, Dict +import ray + +from llmperf.ray_llm_client import LLMClient +from llmperf.models import RequestConfig +from llmperf import common_metrics + + +@ray.remote +class LiteLLMClient(LLMClient): + """Client for LiteLLM Completions API.""" + + def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: + # litellm package isn't serializable, so we import it within the function + # to maintain compatibility with ray. + from litellm import completion, validate_environment + + prompt = request_config.prompt + prompt, prompt_len = prompt + + message = [ + {"role": "system", "content": ""}, + {"role": "user", "content": prompt}, + ] + assert ( + request_config.llm_api is not None + ), "the request config's llm_api must be set." + if request_config.llm_api == "litellm": + model = request_config.model + else: + model = request_config.llm_api + "/" + request_config.model + validation_result = validate_environment(model) + if validation_result["missing_keys"]: + raise ValueError( + f"The following environment vars weren't found but were necessary for " + f"the model {request_config.model}: {validation_result['missing_keys']}" + ) + body = { + "model": model, + "messages": message, + "stream": True, + } + sampling_params = request_config.sampling_params + body.update(sampling_params or {}) + + time_to_next_token = [] + tokens_received = 0 + ttft = 0 + error_response_code = -1 + generated_text = "" + error_msg = "" + output_throughput = 0 + total_request_time = 0 + + metrics = {} + + metrics[common_metrics.ERROR_CODE] = None + metrics[common_metrics.ERROR_MSG] = "" + + try: + start_time = time.monotonic() + most_recent_received_token_time = time.monotonic() + + response = completion(**body) + ttft = 0 + for tok in response: + if tok.choices[0].delta: + delta = tok.choices[0].delta + if delta.get("content", None): + if ttft == 0: + ttft = time.monotonic() - start_time + time_to_next_token.append(ttft) + else: + time_to_next_token.append( + time.monotonic() - most_recent_received_token_time + ) + generated_text += delta["content"] + most_recent_received_token_time = time.monotonic() + tokens_received += 1 + + total_request_time = time.monotonic() - start_time + + output_throughput = tokens_received / total_request_time + + except Exception as e: + metrics[common_metrics.ERROR_MSG] = error_msg + metrics[common_metrics.ERROR_CODE] = error_response_code + + print(f"Warning Or Error: {e}") + print(error_response_code) + + metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token) + metrics[common_metrics.TTFT] = ttft + metrics[common_metrics.E2E_LAT] = total_request_time + metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput + metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len + metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received + metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len + return metrics, generated_text, request_config diff --git a/src/llmperf/ray_clients/openai_chat_completions_client.py b/src/llmperf/ray_clients/openai_chat_completions_client.py new file mode 100644 index 0000000..f2e0a91 --- /dev/null +++ b/src/llmperf/ray_clients/openai_chat_completions_client.py @@ -0,0 +1,120 @@ +import json +import os +import time +from typing import Any, Dict + +import ray +import requests + +from llmperf.ray_llm_client import LLMClient +from llmperf.models import RequestConfig +from llmperf import common_metrics + + +@ray.remote +class OpenAIChatCompletionsClient(LLMClient): + """Client for OpenAI Chat Completions API.""" + + def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: + prompt = request_config.prompt + prompt, prompt_len = prompt + + message = [ + {"role": "system", "content": ""}, + {"role": "user", "content": prompt}, + ] + model = request_config.model + body = { + "model": model, + "messages": message, + "stream": True, + } + sampling_params = request_config.sampling_params + body.update(sampling_params or {}) + time_to_next_token = [] + tokens_received = 0 + ttft = 0 + error_response_code = -1 + generated_text = "" + error_msg = "" + output_throughput = 0 + total_request_time = 0 + + metrics = {} + + metrics[common_metrics.ERROR_CODE] = None + metrics[common_metrics.ERROR_MSG] = "" + + start_time = time.monotonic() + most_recent_received_token_time = time.monotonic() + address = os.environ.get("OPENAI_API_BASE") + if not address: + raise ValueError("the environment variable OPENAI_API_BASE must be set.") + key = os.environ.get("OPENAI_API_KEY") + if not key: + raise ValueError("the environment variable OPENAI_API_KEY must be set.") + headers = {"Authorization": f"Bearer {key}"} + if not address: + raise ValueError("No host provided.") + if not address.endswith("/"): + address = address + "/" + address += "chat/completions" + try: + with requests.post( + address, + json=body, + stream=True, + timeout=180, + headers=headers, + ) as response: + if response.status_code != 200: + error_msg = response.text + error_response_code = response.status_code + response.raise_for_status() + for chunk in response.iter_lines(chunk_size=None): + chunk = chunk.strip() + + if not chunk: + continue + stem = "data: " + chunk = chunk[len(stem) :] + if chunk == b"[DONE]": + continue + tokens_received += 1 + data = json.loads(chunk) + + if "error" in data: + error_msg = data["error"]["message"] + error_response_code = data["error"]["code"] + raise RuntimeError(data["error"]["message"]) + + delta = data["choices"][0]["delta"] + if delta.get("content", None): + if not ttft: + ttft = time.monotonic() - start_time + time_to_next_token.append(ttft) + else: + time_to_next_token.append( + time.monotonic() - most_recent_received_token_time + ) + most_recent_received_token_time = time.monotonic() + generated_text += delta["content"] + + total_request_time = time.monotonic() - start_time + output_throughput = tokens_received / total_request_time + + except Exception as e: + metrics[common_metrics.ERROR_MSG] = error_msg + metrics[common_metrics.ERROR_CODE] = error_response_code + print(f"Warning Or Error: {e}") + print(error_response_code) + + metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token) #This should be same as metrics[common_metrics.E2E_LAT]. Leave it here for now + metrics[common_metrics.TTFT] = ttft + metrics[common_metrics.E2E_LAT] = total_request_time + metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput + metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len + metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received + metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len + + return metrics, generated_text, request_config diff --git a/src/llmperf/ray_clients/sagemaker_client.py b/src/llmperf/ray_clients/sagemaker_client.py new file mode 100644 index 0000000..ce15964 --- /dev/null +++ b/src/llmperf/ray_clients/sagemaker_client.py @@ -0,0 +1,158 @@ +import io +import json +import os +import time +from typing import Any, Dict + +import boto3 +import ray +from transformers import LlamaTokenizerFast + +from llmperf.ray_llm_client import LLMClient +from llmperf.models import RequestConfig +from llmperf import common_metrics + + +@ray.remote +class SageMakerClient(LLMClient): + """Client for OpenAI Chat Completions API.""" + + def __init__(self): + # Sagemaker doesn't return the number of tokens that are generated so we approximate it by + # using the llama tokenizer. + self.tokenizer = LlamaTokenizerFast.from_pretrained( + "hf-internal-testing/llama-tokenizer" + ) + + def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: + if not os.environ.get("AWS_ACCESS_KEY_ID"): + raise ValueError("AWS_ACCESS_KEY_ID must be set.") + if not os.environ.get("AWS_SECRET_ACCESS_KEY"): + raise ValueError("AWS_SECRET_ACCESS_KEY must be set.") + if not os.environ.get("AWS_REGION_NAME"): + raise ValueError("AWS_REGION_NAME must be set.") + + prompt = request_config.prompt + prompt, prompt_len = prompt + + message = [ + {"role": "system", "content": ""}, + {"role": "user", "content": prompt}, + ] + model = request_config.model + sm_runtime = boto3.client( + "sagemaker-runtime", region_name=os.environ.get("AWS_REGION_NAME") + ) + + sampling_params = request_config.sampling_params + + if "max_tokens" in sampling_params: + sampling_params["max_new_tokens"] = sampling_params["max_tokens"] + del sampling_params["max_tokens"] + + message = { + "inputs": [ + [ + {"role": "system", "content": ""}, + {"role": "user", "content": prompt}, + ] + ], + "parameters": { + **request_config.sampling_params, + }, + } + + time_to_next_token = [] + tokens_received = 0 + ttft = 0 + error_response_code = None + generated_text = "" + error_msg = "" + output_throughput = 0 + total_request_time = 0 + metrics = {} + + start_time = time.monotonic() + most_recent_received_token_time = time.monotonic() + + try: + response = sm_runtime.invoke_endpoint_with_response_stream( + EndpointName=model, + ContentType="application/json", + Body=json.dumps(message), + CustomAttributes="accept_eula=true", + ) + + event_stream = response["Body"] + json_byte = b"" + for line, ttft, _ in LineIterator(event_stream): + json_byte += line + time_to_next_token.append( + time.monotonic() - most_recent_received_token_time + ) + most_recent_received_token_time = time.monotonic() + ttft = ttft - start_time + resp = json.loads(json_byte) + total_request_time = time.monotonic() - start_time + generated_text = resp[0]["generation"]["content"] + tokens_received = len(self.tokenizer.encode(generated_text)) + output_throughput = tokens_received / total_request_time + + except Exception as e: + print(f"Warning Or Error: {e}") + print(error_response_code) + error_msg = str(e) + error_response_code = 500 + + metrics[common_metrics.ERROR_MSG] = error_msg + metrics[common_metrics.ERROR_CODE] = error_response_code + metrics[common_metrics.INTER_TOKEN_LAT] = time_to_next_token + metrics[common_metrics.TTFT] = ttft + metrics[common_metrics.E2E_LAT] = total_request_time + metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput + metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len + metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received + metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len + + return metrics, generated_text, request_config + + +class LineIterator: + """ + A helper class for parsing the byte stream input. + Reference: https://aws.amazon.com/blogs/machine-learning/elevating-the-generative-ai-experience-introducing-streaming-support-in-amazon-sagemaker-hosting/ + """ + + def __init__(self, stream): + self.byte_iterator = iter(stream) + self.buffer = io.BytesIO() + self.read_pos = 0 + self.ttft = 0 + + def __iter__(self): + return self + + def __next__(self): + while True: + self.buffer.seek(self.read_pos) + line = self.buffer.readline() + if line and line[-1] == ord("\n"): + if self.ttft == 0: + self.ttft = time.monotonic() + self.read_pos += len(line) + return line[:-1], self.ttft, time.monotonic() + # kyle: dealing with last ']' for chat output + if line and self.read_pos == self.buffer.getbuffer().nbytes - 1: + self.read_pos += 1 + return line, self.ttft, time.monotonic() + try: + chunk = next(self.byte_iterator) + except StopIteration: + if self.read_pos < self.buffer.getbuffer().nbytes: + continue + raise + if "PayloadPart" not in chunk: + print("Unknown event type:" + chunk) + continue + self.buffer.seek(0, io.SEEK_END) + self.buffer.write(chunk["PayloadPart"]["Bytes"]) diff --git a/src/llmperf/ray_clients/vertexai_client.py b/src/llmperf/ray_clients/vertexai_client.py new file mode 100644 index 0000000..4e2cf12 --- /dev/null +++ b/src/llmperf/ray_clients/vertexai_client.py @@ -0,0 +1,135 @@ +import json +import os +import time +from typing import Any, Dict + +import ray +import requests +from transformers import LlamaTokenizerFast + +from llmperf.ray_llm_client import LLMClient +from llmperf.models import RequestConfig +from llmperf import common_metrics + + +@ray.remote +class VertexAIClient(LLMClient): + """Client for VertexAI API.""" + + def __init__(self): + # VertexAI doesn't return the number of tokens that are generated so we approximate it by + # using the llama tokenizer. + self.tokenizer = LlamaTokenizerFast.from_pretrained( + "hf-internal-testing/llama-tokenizer" + ) + + def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: + project_id = os.environ.get("GCLOUD_PROJECT_ID") + region = os.environ.get("GCLOUD_REGION") + endpoint_id = os.environ.get("VERTEXAI_ENDPOINT_ID") + access_token = os.environ.get("GCLOUD_ACCESS_TOKEN").strip() + if not project_id: + raise ValueError("the environment variable GCLOUD_PROJECT_ID must be set.") + if not region: + raise ValueError("the environment variable GCLOUD_REGION must be set.") + if not endpoint_id: + raise ValueError( + "the environment variable VERTEXAI_ENDPOINT_ID must be set." + ) + if not access_token: + raise ValueError( + "the environment variable GCLOUD_ACCESS_TOKEN must be set." + ) + prompt = request_config.prompt + prompt, prompt_len = prompt + + time_to_next_token = [] + tokens_received = 0 + ttft = 0 + generated_text = "" + output_throughput = 0 + total_request_time = 0 + + metrics = {} + + metrics[common_metrics.ERROR_CODE] = None + metrics[common_metrics.ERROR_MSG] = "" + + try: + # Define the URL for the request + url = ( + f"https://{region}-aiplatform.googleapis.com/v1/projects/" + f"{project_id}/locations/{region}/endpoints/{endpoint_id}:predict" + ) + + # Define the headers + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json", + } + + sampling_params = request_config.sampling_params + if "max_new_tokens" in sampling_params: + sampling_params["maxOutputTokens"] = sampling_params.pop( + "max_new_tokens" + ) + + # Define the data payload + data = {"instances": [{"prompt": prompt}], "parameters": sampling_params} + + # Make the POST request + start_time = time.monotonic() + response = requests.post(url, headers=headers, data=json.dumps(data)) + total_request_time = time.monotonic() - start_time + response_code = response.status_code + response.raise_for_status() + # output from the endpoint is in the form: + # {"predictions": ["Input: ... \nOutput:\n ..."]} + generated_text = response.json()["predictions"][0].split("\nOutput:\n")[1] + tokens_received = len(self.tokenizer.encode(generated_text)) + ttft = -1 + output_throughput = tokens_received / total_request_time + time_to_next_token = [ + total_request_time / tokens_received for _ in range(tokens_received) + ] + + except Exception as e: + metrics[common_metrics.ERROR_MSG] = str(e) + metrics[common_metrics.ERROR_CODE] = response_code + print(f"Warning Or Error: {e}") + print(response_code) + print(response_code) + + metrics[common_metrics.INTER_TOKEN_LAT] = time_to_next_token + metrics[common_metrics.TTFT] = ttft + metrics[common_metrics.E2E_LAT] = total_request_time + metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput + metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len + metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received + metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len + + return metrics, generated_text, request_config + + +if __name__ == "__main__": + # Run these before hand: + + # gcloud auth application-default login + # gcloud config set project YOUR_PROJECT_ID + # export GCLOUD_ACCESS_TOKEN=$(gcloud auth print-access-token) + # export GCLOUD_PROJECT_ID=YOUR_PROJECT_ID + # export GCLOUD_REGION=YOUR_REGION + # export VERTEXAI_ENDPOINT_ID=YOUR_ENDPOINT_ID + + client = VertexAIClient.remote() + request_config = RequestConfig( + prompt=("Give me ten interview questions for the role of program manager.", 10), + model="gpt3", + sampling_params={ + "temperature": 0.2, + "max_new_tokens": 256, + "top_k": 40, + "top_p": 0.95, + }, + ) + ray.get(client.llm_request.remote(request_config)) diff --git a/src/llmperf/ray_llm_client.py b/src/llmperf/ray_llm_client.py new file mode 100644 index 0000000..cc31639 --- /dev/null +++ b/src/llmperf/ray_llm_client.py @@ -0,0 +1,22 @@ +import abc +from typing import Any, Dict, Tuple + +from llmperf.models import RequestConfig + + +class LLMClient: + """A client for making requests to a LLM API e.g Anyscale Endpoints.""" + + @abc.abstractmethod + def llm_request( + self, request_config: RequestConfig + ) -> Tuple[Dict[str, Any], str, RequestConfig]: + """Make a single completion request to a LLM API + + Returns: + Metrics about the performance charateristics of the request. + The text generated by the request to the LLM API. + The request_config used to make the request. This is mainly for logging purposes. + + """ + ... diff --git a/src/llmperf/requests_launcher.py b/src/llmperf/requests_launcher.py new file mode 100644 index 0000000..d5a12ce --- /dev/null +++ b/src/llmperf/requests_launcher.py @@ -0,0 +1,48 @@ +from typing import Any, List + +from llmperf.ray_llm_client import LLMClient +from llmperf.models import RequestConfig +from ray.util import ActorPool + + +class RequestsLauncher: + """Launch requests from LLMClients to their respective LLM APIs.""" + + def __init__(self, llm_clients: List[LLMClient]): + self._llm_client_pool = ActorPool(llm_clients) + + def launch_requests(self, request_config: RequestConfig) -> None: + """Launch requests to the LLM API. + + Args: + request_config: The configuration for the request. + + """ + if self._llm_client_pool.has_free(): + self._llm_client_pool.submit( + lambda client, _request_config: client.llm_request.remote( + _request_config + ), + request_config, + ) + + def get_next_ready(self, block: bool = False) -> List[Any]: + """Return results that are ready from completed requests. + + Args: + block: Whether to block until a result is ready. + + Returns: + A list of results that are ready. + + """ + results = [] + if not block: + while self._llm_client_pool.has_next(): + results.append(self._llm_client_pool.get_next_unordered()) + else: + while not self._llm_client_pool.has_next(): + pass + while self._llm_client_pool.has_next(): + results.append(self._llm_client_pool.get_next_unordered()) + return results diff --git a/src/llmperf/sonnet.txt b/src/llmperf/sonnet.txt new file mode 100644 index 0000000..9f13ead --- /dev/null +++ b/src/llmperf/sonnet.txt @@ -0,0 +1,84 @@ +Shall I compare thee to a summer's day? +Thou art more lovely and more temperate: +Rough winds do shake the darling buds of May, +And summer's lease hath all too short a date: +Sometime too hot the eye of heaven shines, +And often is his gold complexion dimm'd; +And every fair from fair sometime declines, +By chance or nature's changing course untrimm'd; +But thy eternal summer shall not fade +Nor lose possession of that fair thou owest; +Nor shall Death brag thou wander'st in his shade, +When in eternal lines to time thou growest: +So long as men can breathe or eyes can see, +So long lives this and this gives life to thee. +Then let not winter's ragged hand deface +In thee thy summer, ere thou be distill'd: +Make sweet some vial; treasure thou some place +With beauty's treasure, ere it be self-kill'd. +That use is not forbidden usury, +Which happies those that pay the willing loan; +That's for thyself to breed another thee, +Or ten times happier, be it ten for one; +Ten times thyself were happier than thou art, +If ten of thine ten times refigured thee: +Then what could death do, if thou shouldst depart, +Leaving thee living in posterity? +Be not self-will'd, for thou art much too fair +To be death's conquest and make worms thine heir. +Where art thou, Muse, that thou forget'st so long +To speak of that which gives thee all thy might? +Spend'st thou thy fury on some worthless song, +Darkening thy power to lend base subjects light? +Return, forgetful Muse, and straight redeem +In gentle numbers time so idly spent; +Sing to the ear that doth thy lays esteem +And gives thy pen both skill and argument. +Rise, resty Muse, my love's sweet face survey, +If Time have any wrinkle graven there; +If any, be a satire to decay, +And make Time's spoils despised every where. +Give my love fame faster than Time wastes life; +So thou prevent'st his scythe and crooked knife. +My glass shall not persuade me I am old, +So long as youth and thou are of one date; +But when in thee time's furrows I behold, +Then look I death my days should expiate. +For all that beauty that doth cover thee +Is but the seemly raiment of my heart, +Which in thy breast doth live, as thine in me: +How can I then be elder than thou art? +O, therefore, love, be of thyself so wary +As I, not for myself, but for thee will; +Bearing thy heart, which I will keep so chary +As tender nurse her babe from faring ill. +Presume not on thy heart when mine is slain; +Thou gavest me thine, not to give back again. +So am I as the rich, whose blessed key +Can bring him to his sweet up-locked treasure, +The which he will not every hour survey, +For blunting the fine point of seldom pleasure. +Therefore are feasts so solemn and so rare, +Since, seldom coming, in the long year set, +Like stones of worth they thinly placed are, +Or captain jewels in the carcanet. +So is the time that keeps you as my chest, +Or as the wardrobe which the robe doth hide, +To make some special instant special blest, +By new unfolding his imprison'd pride. +Blessed are you, whose worthiness gives scope, +Being had, to triumph, being lack'd, to hope. +If there be nothing new, but that which is +Hath been before, how are our brains beguiled, +Which, labouring for invention, bear amiss +The second burden of a former child! +O, that record could with a backward look, +Even of five hundred courses of the sun, +Show me your image in some antique book, +Since mind at first in character was done! +That I might see what the old world could say +To this composed wonder of your frame; +Whether we are mended, or whether better they, +Or whether revolution be the same. +O, sure I am, the wits of former days +To subjects worse have given admiring praise. \ No newline at end of file diff --git a/src/llmperf/utils.py b/src/llmperf/utils.py new file mode 100644 index 0000000..4e3b2e9 --- /dev/null +++ b/src/llmperf/utils.py @@ -0,0 +1,147 @@ +import json +import math +import pathlib +import random +import subprocess +import time +from typing import Any, Dict, Tuple + +from transformers import LlamaTokenizerFast + + +RESULTS_VERSION = "2023-08-31" + + +class LLMPerfResults: + def __init__( + self, + name: str, + metadata: Dict[str, Any] = None, + ): + self.name = name + self.metadata = metadata or {} + self.timestamp = int(time.time()) + self.metadata["timestamp"] = self.timestamp + self.version = RESULTS_VERSION + + def to_dict(self): + data = { + "version": self.version, + "name": self.name, + } + data.update(self.metadata) + data = flatten_dict(data) + return data + + def json(self): + data = self.to_dict() + return json.dumps(data) + + +def upload_to_s3(results_path: str, s3_path: str) -> None: + """Upload the results to s3. + + Args: + results_path: The path to the results file. + s3_path: The s3 path to upload the results to. + + """ + + command = ["aws", "s3", "sync", results_path, f"{s3_path}/"] + result = subprocess.run(command) + if result.returncode == 0: + print("Files uploaded successfully!") + else: + print("An error occurred:") + print(result.stderr) + + +def randomly_sample_sonnet_lines_prompt( + prompt_tokens_mean: int = 550, + prompt_tokens_stddev: int = 250, + expect_output_tokens: int = 150, +) -> Tuple[str, int]: + """Generate a prompt that randomly samples lines from a the shakespeare sonnet at sonnet.txt. + + Args: + prompt_length_mean: The mean length of the prompt to generate. + prompt_len_stddev: The standard deviation of the length of the prompt to generate. + expect_output_tokens: The number of tokens to expect in the output. This is used to + determine the length of the prompt. The prompt will be generated such that the output + will be approximately this many tokens. + + Note: + tokens will be counted from the sonnet using the Llama tokenizer. Using one tokenizer + ensures a fairer comparison across different LLMs. For example, if gpt 3.5 tokenizes + a prompt in less tokens than Llama2, then this will be reflected in the results since + they will be fed identical prompts. + + Returns: + A tuple of the prompt and the length of the prompt. + """ + + tokenizer = LlamaTokenizerFast.from_pretrained( + "hf-internal-testing/llama-tokenizer" + ) + + get_token_length = lambda text: len(tokenizer.encode(text)) + + prompt = ( + "Randomly stream lines from the following text " + f"with {expect_output_tokens} output tokens. " + "Don't generate eos tokens:\n\n" + ) + # get a prompt length that is at least as long as the base + num_prompt_tokens = sample_random_positive_int( + prompt_tokens_mean, prompt_tokens_stddev + ) + while num_prompt_tokens < get_token_length(prompt): + num_prompt_tokens = sample_random_positive_int( + prompt_tokens_mean, prompt_tokens_stddev + ) + remaining_prompt_tokens = num_prompt_tokens - get_token_length(prompt) + sonnet_path = pathlib.Path(__file__).parent.resolve() / "sonnet.txt" + with open(sonnet_path, "r") as f: + sonnet_lines = f.readlines() + random.shuffle(sonnet_lines) + sampling_lines = True + while sampling_lines: + for line in sonnet_lines: + line_to_add = line + if remaining_prompt_tokens - get_token_length(line_to_add) < 0: + # This will cut off a line in the middle of a word, but that's ok since an + # llm should be able to handle that. + line_to_add = line_to_add[: int(math.ceil(remaining_prompt_tokens))] + sampling_lines = False + prompt += line_to_add + break + prompt += line_to_add + remaining_prompt_tokens -= get_token_length(line_to_add) + return (prompt, num_prompt_tokens) + + +def sample_random_positive_int(mean: int, stddev: int) -> int: + """Sample random numbers from a gaussian distribution until a positive number is sampled. + + Args: + mean: The mean of the gaussian distribution to sample from. + stddev: The standard deviation of the gaussian distribution to sample from. + + Returns: + A random positive integer sampled from the gaussian distribution. + """ + ret = -1 + while ret <= 0: + ret = int(random.gauss(mean, stddev)) + return ret + + +def flatten_dict(d, parent_key="", sep="_"): + items = [] + for k, v in d.items(): + new_key = f"{parent_key}{sep}{k}" if parent_key else k + if isinstance(v, dict): + items.extend(flatten_dict(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) diff --git a/token_benchmark_ray.py b/token_benchmark_ray.py new file mode 100644 index 0000000..a078e35 --- /dev/null +++ b/token_benchmark_ray.py @@ -0,0 +1,464 @@ +import argparse +from collections.abc import Iterable +import json +import os +from pathlib import Path +import re +import time +from typing import Any, Dict, List, Optional, Tuple + +import pandas as pd +import ray + +from llmperf import common_metrics +from llmperf.common import SUPPORTED_APIS, construct_clients + +from llmperf.models import RequestConfig +from llmperf.requests_launcher import RequestsLauncher +from llmperf.utils import ( + randomly_sample_sonnet_lines_prompt, + LLMPerfResults, + sample_random_positive_int, +) +from tqdm import tqdm + +from transformers import LlamaTokenizerFast + +def get_token_throughput_latencies( + model: str, + mean_input_tokens: int, + stddev_input_tokens: int, + mean_output_tokens: int, + stddev_output_tokens: int, + additional_sampling_params: Optional[Dict[str, Any]] = None, + num_concurrent_requests: int = 1, + max_num_completed_requests: int = 500, + test_timeout_s=90, + llm_api="openai", +) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: + """Get the token throughput and latencies for the given model. + + Args: + model: The name of the model to query. + mean_input_tokens: The mean number of tokens to send in the prompt for the request. + stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request. + mean_output_tokens: The mean number of tokens to generate per request. + stddev_output_tokens: The standard deviation of the number of tokens to generate per request. + additional_sampling_params: Additional sampling parameters to send with the request. + For more information see the LLM APIs documentation for the completions + num_concurrent_requests: The number of concurrent requests to make. Increase + this to increase the amount of load and vice versa. + test_timeout_s: The amount of time to run the test for before reporting results. + llm_api: The name of the llm api to use. Either "openai" or "litellm". + + Returns: + A summary of the performance metrics collected across all completed requests + (e.g. throughput, latencies, etc.) + The individual metrics for each request. + """ + tokenizer = LlamaTokenizerFast.from_pretrained( + "hf-internal-testing/llama-tokenizer" + ) + get_token_length = lambda text: len(tokenizer.encode(text)) + + if not additional_sampling_params: + additional_sampling_params = {} + + clients = construct_clients(llm_api=llm_api, num_clients=num_concurrent_requests) + req_launcher = RequestsLauncher(clients) + completed_requests = [] + num_completed_requests = 0 + start_time = time.monotonic() + iter = 0 + pbar = tqdm(total=max_num_completed_requests) + while ( + time.monotonic() - start_time < test_timeout_s + and len(completed_requests) < max_num_completed_requests + ): + iter += 1 + num_output_tokens = sample_random_positive_int( + mean_output_tokens, stddev_output_tokens + ) + + prompt = randomly_sample_sonnet_lines_prompt( + prompt_tokens_mean=mean_input_tokens, + prompt_tokens_stddev=stddev_input_tokens, + expect_output_tokens=num_output_tokens, + ) + + default_sampling_params = {"max_tokens": num_output_tokens} + default_sampling_params.update(additional_sampling_params) + request_config = RequestConfig( + model=model, + prompt=prompt, + sampling_params=default_sampling_params, + llm_api=llm_api, + ) + req_launcher.launch_requests(request_config) + # Retrieving results less frequently allows for more concurrent requests + # to be launched. This will overall reduce the amount of time it takes + # for the test to run. + if not (iter % num_concurrent_requests): + outs = req_launcher.get_next_ready() + all_metrics = [] + for out in outs: + request_metrics, gen_text, _ = out + num_output_tokens = get_token_length(gen_text) + if num_output_tokens: + request_metrics[common_metrics.INTER_TOKEN_LAT] /= num_output_tokens + else: + request_metrics[common_metrics.INTER_TOKEN_LAT] = 0 + request_metrics[common_metrics.NUM_OUTPUT_TOKENS] = num_output_tokens + request_metrics[common_metrics.NUM_TOTAL_TOKENS] = request_metrics[common_metrics.NUM_INPUT_TOKENS] + num_output_tokens + all_metrics.append(request_metrics) + completed_requests.extend(all_metrics) + pbar.update(len(completed_requests) - num_completed_requests) + num_completed_requests = len(completed_requests) + + pbar.close() + end_time = time.monotonic() + if end_time - start_time >= test_timeout_s: + print("Test timed out before all requests could be completed.") + + # check one last time that there are no remaining results to collect. + outs = req_launcher.get_next_ready() + all_metrics = [] + for out in outs: + request_metrics, gen_text, _ = out + num_output_tokens = get_token_length(gen_text) + if num_output_tokens: + request_metrics[common_metrics.INTER_TOKEN_LAT] /= num_output_tokens + else: + request_metrics[common_metrics.INTER_TOKEN_LAT] = 0 + request_metrics[common_metrics.NUM_OUTPUT_TOKENS] = num_output_tokens + request_metrics[common_metrics.NUM_TOTAL_TOKENS] = request_metrics[common_metrics.NUM_INPUT_TOKENS] + num_output_tokens + + all_metrics.append(request_metrics) + completed_requests.extend(all_metrics) + + print(f"\Results for token benchmark for {model} queried with the {llm_api} api.\n") + ret = metrics_summary(completed_requests, start_time, end_time) + + metadata = { + "model": model, + "mean_input_tokens": mean_input_tokens, + "stddev_input_tokens": stddev_input_tokens, + "mean_output_tokens": mean_output_tokens, + "stddev_output_tokens": stddev_output_tokens, + "num_concurrent_requests": num_concurrent_requests, + "additional_sampling_params": additional_sampling_params, + } + + metadata["results"] = ret + + return metadata, completed_requests + + +def metrics_summary( + metrics: List[Dict[str, Any]], start_time: int, end_time: int +) -> Dict[str, Any]: + """Generate a summary over metrics generated from potentially multiple instances of this client. + + Args: + metrics: The metrics to summarize. + start_time: The time the test started. + end_time: The time the test ended. + + Returns: + A summary with the following information: + - Overall throughput (generated tokens / total test time) + - Number of completed requests + - Error rate + - Error code frequency + - Quantiles (p25-p99) for the following metrics: + - Inter token latency + - Time to first token + - User total request time + - Number of tokens processed per request + - Number of tokens generated per request + - User throughput (tokens / s) + """ + ret = {} + + def flatten(item): + for sub_item in item: + if isinstance(sub_item, Iterable) and not isinstance(sub_item, str): + yield from flatten(sub_item) + else: + yield sub_item + + df = pd.DataFrame(metrics) + df_without_errored_req = df[df[common_metrics.ERROR_CODE].isna()] + + for key in [ + common_metrics.INTER_TOKEN_LAT, + common_metrics.TTFT, + common_metrics.E2E_LAT, + common_metrics.REQ_OUTPUT_THROUGHPUT, + common_metrics.NUM_INPUT_TOKENS, + common_metrics.NUM_OUTPUT_TOKENS + ]: + print(key) + ret[key] = {} + series = pd.Series(list(flatten(df_without_errored_req[key]))).dropna() + quantiles = series.quantile([0.25, 0.5, 0.75, 0.9, 0.95, 0.99]).to_dict() + quantiles_reformatted_keys = {} + for quantile, value in quantiles.items(): + reformatted_key = f"p{int(quantile * 100)}" + print(f" {reformatted_key} = {value}") + quantiles_reformatted_keys[reformatted_key] = value + ret[key]["quantiles"] = quantiles_reformatted_keys + mean = series.mean() + print(f" mean = {mean}") + ret[key]["mean"] = mean + print(f" min = {series.min()}") + ret[key]["min"] = series.min() + print(f" max = {series.max()}") + ret[key]["max"] = series.max() + print(f" stddev = {series.std()}") + ret[key]["stddev"] = series.std() + + ret[common_metrics.NUM_REQ_STARTED] = len(metrics) + + error_codes = df[common_metrics.ERROR_CODE].dropna() + num_errors = len(error_codes) + ret[common_metrics.ERROR_RATE] = num_errors / len(metrics) if len(metrics) else 0 + ret[common_metrics.NUM_ERRORS] = num_errors + print(f"Number Of Errored Requests: {num_errors}") + error_code_frequency = dict(error_codes.value_counts()) + if num_errors: + error_code_frequency = dict(error_codes.value_counts()) + print("Error Code Frequency") + print(error_code_frequency) + ret[common_metrics.ERROR_CODE_FREQ] = str(error_code_frequency) + + overall_output_throughput = df_without_errored_req[ + common_metrics.NUM_OUTPUT_TOKENS + ].sum() / (end_time - start_time) + + print(f"Overall Output Throughput: {overall_output_throughput}") + ret[common_metrics.OUTPUT_THROUGHPUT] = overall_output_throughput + + num_completed_requests = len(df_without_errored_req) + num_completed_requests_per_min = ( + num_completed_requests / (end_time - start_time) * 60 + ) + print(f"Number Of Completed Requests: {num_completed_requests}") + print(f"Completed Requests Per Minute: {num_completed_requests_per_min}") + + ret[common_metrics.NUM_COMPLETED_REQUESTS] = num_completed_requests + ret[common_metrics.COMPLETED_REQUESTS_PER_MIN] = num_completed_requests_per_min + + return ret + + +def run_token_benchmark( + llm_api: str, + model: str, + test_timeout_s: int, + max_num_completed_requests: int, + num_concurrent_requests: int, + mean_input_tokens: int, + stddev_input_tokens: int, + mean_output_tokens: int, + stddev_output_tokens: int, + additional_sampling_params: str, + results_dir: str, + user_metadata: Dict[str, Any], +): + """ + Args: + llm_api: The name of the llm api to use. + model: The name of the model to query. + max_num_completed_requests: The number of requests to complete before finishing the test. + test_timeout_s: The amount of time to run the test for before reporting results. + num_concurrent_requests: The number of concurrent requests to make. Increase + this to increase the amount of load and vice versa. + mean_input_tokens: The mean number of tokens to send in the prompt for the request. + stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request. + mean_output_tokens: The mean number of tokens to generate per request. + stddev_output_tokens: The standard deviation of the number of tokens to generate per request. + additional_sampling_params: Additional sampling parameters to send with the request. + For more information see the LLM APIs documentation for the completions. + results_dir: The directory to save the results to. + user_metadata: Additional metadata to include in the results. + """ + if mean_input_tokens < 40: + print( + "the minimum number of input tokens that will be sent is 41" + " because of the prompting logic right now" + ) + + summary, individual_responses = get_token_throughput_latencies( + model=model, + llm_api=llm_api, + test_timeout_s=test_timeout_s, + max_num_completed_requests=max_num_completed_requests, + mean_input_tokens=mean_input_tokens, + stddev_input_tokens=stddev_input_tokens, + mean_output_tokens=mean_output_tokens, + stddev_output_tokens=stddev_output_tokens, + num_concurrent_requests=num_concurrent_requests, + additional_sampling_params=json.loads(additional_sampling_params), + ) + + if results_dir: + filename = f"{model}_{mean_input_tokens}_{mean_output_tokens}" + filename = re.sub(r"[^\w\d-]+", "-", filename) + filename = re.sub(r"-{2,}", "-", filename) + summary_filename = f"{filename}_summary" + individual_responses_filename = f"{filename}_individual_responses" + + # Update to metadata. + summary.update(user_metadata) + + results = LLMPerfResults(name=summary_filename, metadata=summary) + results_dir = Path(results_dir) + if not results_dir.exists(): + results_dir.mkdir(parents=True) + elif not results_dir.is_dir(): + raise ValueError(f"{results_dir} is not a directory") + + try: + with open(results_dir / f"{summary_filename}.json", "w") as f: + json.dump(results.to_dict(), f, indent=4, default=str) + except Exception as e: + print(results.to_dict()) + raise e + + try: + with open(results_dir / f"{individual_responses_filename}.json", "w") as f: + json.dump(individual_responses, f, indent=4) + except Exception as e: + print(individual_responses) + raise e + + +args = argparse.ArgumentParser( + description="Run a token throughput and latency benchmark." +) + +args.add_argument( + "--model", type=str, required=True, help="The model to use for this load test." +) +args.add_argument( + "--mean-input-tokens", + type=int, + default=550, + help=( + "The mean number of tokens to send in the prompt for the request. " + " (default: %(default)s)" + ), +) +args.add_argument( + "--stddev-input-tokens", + type=int, + default=150, + help=( + "The standard deviation of number of tokens to send in the prompt for the request. " + "(default: %(default)s)" + ), +) +args.add_argument( + "--mean-output-tokens", + type=int, + default=150, + help=( + "The mean number of tokens to generate from each llm request. This is the max_tokens param " + "for the completions API. Note that this is not always the number of tokens returned. " + "(default: %(default)s)" + ), +) +args.add_argument( + "--stddev-output-tokens", + type=int, + default=80, + help=( + "The stdandard deviation on the number of tokens to generate per llm request. " + "(default: %(default)s)" + ), +) +args.add_argument( + "--num-concurrent-requests", + type=int, + default=10, + help=("The number of concurrent requests to send (default: %(default)s)"), +) +args.add_argument( + "--timeout", + type=int, + default=90, + help="The amount of time to run the load test for. (default: %(default)s)", +) +args.add_argument( + "--max-num-completed-requests", + type=int, + default=10, + help=( + "The number of requests to complete before finishing the test. Note " + "that its possible for the test to timeout first. (default: %(default)s)" + ), +) +args.add_argument( + "--additional-sampling-params", + type=str, + default="{}", + help=( + "Additional sampling params to send with the each request to the LLM API. " + "(default: %(default)s) No additional sampling params are sent." + ), +) +args.add_argument( + "--results-dir", + type=str, + default="", + help=( + "The directory to save the results to. " + "(`default: %(default)s`) No results are saved)" + ), +) +args.add_argument( + "--llm-api", + type=str, + default="openai", + help=( + f"The name of the llm api to use. Can select from {SUPPORTED_APIS}" + " (default: %(default)s)" + ), +) +args.add_argument( + "--metadata", + type=str, + default="", + help=( + "A comma separated list of metadata to include in the results, e.g. " + "name=foo,bar=1. These will be added to the metadata field of the results. " + ), +) + +if __name__ == "__main__": + env_vars = dict(os.environ) + ray.init(runtime_env={"env_vars": env_vars}) + args = args.parse_args() + + # Parse user metadata. + user_metadata = {} + if args.metadata: + for item in args.metadata.split(","): + key, value = item.split("=") + user_metadata[key] = value + + run_token_benchmark( + llm_api=args.llm_api, + model=args.model, + test_timeout_s=args.timeout, + max_num_completed_requests=args.max_num_completed_requests, + mean_input_tokens=args.mean_input_tokens, + stddev_input_tokens=args.stddev_input_tokens, + mean_output_tokens=args.mean_output_tokens, + stddev_output_tokens=args.stddev_output_tokens, + num_concurrent_requests=args.num_concurrent_requests, + additional_sampling_params=args.additional_sampling_params, + results_dir=args.results_dir, + user_metadata=user_metadata, + ) From 4d1f8efb4d1c3217fc7f0ce1bbb4617d3f40173d Mon Sep 17 00:00:00 2001 From: Avnish Narayan Date: Mon, 4 Dec 2023 14:22:51 -0800 Subject: [PATCH 2/3] Add notice back in Signed-off-by: Avnish Narayan --- NOTICE.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 NOTICE.txt diff --git a/NOTICE.txt b/NOTICE.txt new file mode 100644 index 0000000..4820e73 --- /dev/null +++ b/NOTICE.txt @@ -0,0 +1,14 @@ +[Project Name] +Copyright 2023-onwards Anyscale, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. \ No newline at end of file From 1e42cb3fc45134ab25d1994b4e2d87b9fcbdd5f5 Mon Sep 17 00:00:00 2001 From: Avnish Narayan Date: Mon, 4 Dec 2023 18:43:10 -0800 Subject: [PATCH 3/3] Merge master Signed-off-by: Avnish Narayan --- configs.py | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 configs.py diff --git a/configs.py b/configs.py deleted file mode 100644 index ac8e11d..0000000 --- a/configs.py +++ /dev/null @@ -1,29 +0,0 @@ -from dataclasses import dataclass -from enum import Enum - -class Framework(Enum): - ANYSCALE = "anyscale" - OPENAI = "openai" - FIREWORKS = "fireworks" - VERTEXAI = "vertexai" - SAGEMAKER = "sagemaker" - PERPLEXITY = "perplexity" - TOGETHER = "together" - VLLM = "vllm" - TGI = "tgi" - - # helper method to get the list of values/ supported frameworks - @classmethod - def list(cls): - return list(map(lambda c: c.value, cls)) - -# One class for all endpoint configs -@dataclass -class EndpointConfig: - framework: Framework - api_base: str = None - api_key: str = None - model: str = None - region: str = None # Used by SageMaker - endpoint_name: str = None # Used by SageMaker - project_id: str = None # Used by VertexAI