-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7d60c66
commit 7897ba2
Showing
29 changed files
with
1,984 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
ARG PYVERSION=py38 | ||
FROM baseten/truss-server-base:3.8-gpu-v0.4.9 | ||
|
||
ENV PYTHON_EXECUTABLE python3 | ||
|
||
|
||
RUN grep -w 'ID=debian\|ID_LIKE=debian' /etc/os-release || { echo "ERROR: Supplied base image is not a debian image"; exit 1; } | ||
RUN $PYTHON_EXECUTABLE -c "import sys; sys.exit(0) if sys.version_info.major == 3 and sys.version_info.minor >=8 and sys.version_info.minor <=11 else sys.exit(1)" \ | ||
|| { echo "ERROR: Supplied base image does not have 3.8 <= python <= 3.11"; exit 1; } | ||
|
||
|
||
RUN pip install --upgrade pip --no-cache-dir | ||
|
||
|
||
# If user base image is supplied in config, apply build commands from truss base image | ||
|
||
RUN apt-get update | ||
RUN apt-get install -y socat socat | ||
RUN apt-get install -y curl | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
COPY ./requirements.txt requirements.txt | ||
RUN pip install -r requirements.txt --no-cache-dir | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
ENV APP_HOME /app | ||
WORKDIR $APP_HOME | ||
|
||
|
||
|
||
# Copy data before code for better caching | ||
COPY ./data /app/data | ||
|
||
COPY ./server /app | ||
COPY ./start.sh /app | ||
|
||
COPY ./config.yaml /app/config.yaml | ||
|
||
## RUN cd /app/data && curl -O https://huggingface.co/codellama/CodeLlama-7b-hf/resolve/main/pytorch_model-00001-of-00003.bin && curl -O https://huggingface.co/codellama/CodeLlama-7b-hf/resolve/main/pytorch_model-00002-of-00003.bin && curl -O https://huggingface.co/codellama/CodeLlama-7b-hf/resolve/main/pytorch_model-00003-of-00003.bin | ||
|
||
COPY ./model /app/model | ||
## RUN apt-get install git | ||
|
||
## RUN cd /app/data && git clone https://huggingface.co/codellama/CodeLlama-7b-hf/resolve/main/pytorch_model-00001-of-00003.bin && git clone https://huggingface.co/codellama/CodeLlama-7b-hf/resolve/main/pytorch_model-00002-of-00003.bin && git clone https://huggingface.co/codellama/CodeLlama-7b-hf/resolve/main/pytorch_model-00003-of-00003.bin | ||
|
||
|
||
|
||
|
||
ENV INFERENCE_SERVER_PORT 8080 | ||
ENV SERVER_START_CMD="python3 /app/inference_server.py" | ||
CMD bash start.sh | ||
|
||
## ENTRYPOINT ["python3", "/app/inference_server.py"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
[![Deploy to Baseten](https://user-images.githubusercontent.com/2389286/236301770-16f46d4f-4e23-4db5-9462-f578ec31e751.svg)](https://app.baseten.co/explore/alpaca) | ||
|
||
# Alpaca-7B Truss | ||
|
||
This is a [Truss](https://truss.baseten.co/) for Alpaca-7B, a fine-tuned variant of LLaMA-7B. LLaMA is a family of language models released by Meta. This README will walk you through how to deploy this Truss on Baseten to get your own instance of Alpaca-7B. | ||
|
||
## Truss | ||
|
||
Truss is an open-source model serving framework developed by Baseten. It allows you to develop and deploy machine learning models onto Baseten (and other platforms like [AWS](https://truss.baseten.co/deploy/aws) or [GCP](https://truss.baseten.co/deploy/gcp). Using Truss, you can develop a GPU model using [live-reload](https://baseten.co/blog/technical-deep-dive-truss-live-reload), package models and their associated code, create Docker containers and deploy on Baseten. | ||
|
||
## Deploying Alpaca-7B | ||
|
||
To deploy the Alpaca-7B Truss, you'll need to follow these steps: | ||
|
||
1. __Prerequisites__: Make sure you have a Baseten account and API key. You can sign up for a Baseten account [here](https://app.baseten.co/signup). | ||
|
||
2. __Install Truss and the Baseten Python client__: If you haven't already, install the Baseten Python client and Truss in your development environment using: | ||
``` | ||
pip install --upgrade baseten truss | ||
``` | ||
|
||
3. __Load the Alpaca-7B Truss__: Assuming you've cloned this repo, spin up an IPython shell and load the Truss into memory: | ||
``` | ||
import truss | ||
alpaca7b_truss = truss.load("path/to/alpaca7b_truss") | ||
``` | ||
|
||
4. __Log in to Baseten__: Log in to your Baseten account using your API key (key found [here](https://app.baseten.co/settings/account/api_keys)): | ||
``` | ||
import baseten | ||
baseten.login("PASTE_API_KEY_HERE") | ||
``` | ||
|
||
5. __Deploy the Alpaca-7B Truss__: Deploy the Alpaca-7B Truss to Baseten with the following command: | ||
``` | ||
baseten.deploy(alpaca7b_truss) | ||
``` | ||
|
||
Once your Truss is deployed, you can start using the Alpaca-7B model through the Baseten platform! Navigate to the Baseten UI to watch the model build and deploy and invoke it via the REST API. | ||
|
||
## Alpaca-7B API documentation | ||
This section provides an overview of the Alpaca-7B API, its parameters, and how to use it. The API consists of a single route named `predict`, which you can invoke to generate text based on the provided instruction. | ||
|
||
### API route: `predict` | ||
The predict route is the primary method for generating text completions based on a given instruction. It takes several parameters: | ||
|
||
- __instruction__: The input text that you want the model to generate a response for. | ||
- __temperature__ (optional, default=0.1): Controls the randomness of the generated text. Higher values produce more diverse results, while lower values produce more deterministic results. | ||
- __top_p__ (optional, default=0.75): The cumulative probability threshold for token sampling. The model will only consider tokens whose cumulative probability is below this threshold. | ||
- __top_k__ (optional, default=40): The number of top tokens to consider when sampling. The model will only consider the top_k highest-probability tokens. | ||
- __num_beams__ (optional, default=4): The number of beams used for beam search. Increasing this value can result in higher-quality output but will increase the computational cost. | ||
|
||
The API also supports passing any parameter supported by Huggingface's `Transformers.generate`. | ||
|
||
## Example usage | ||
|
||
You can use the `baseten` model package to invoke your model from Python | ||
``` | ||
import baseten | ||
# You can retrieve your deployed model ID from the UI | ||
model = baseten.deployed_model_version_id('YOUR_MODEL_ID') | ||
request = { | ||
"prompt": "What's the meaning of life?", | ||
"temperature": 0.1, | ||
"top_p": 0.75, | ||
"top_k": 40, | ||
"num_beams": 4, | ||
} | ||
response = model.predict(request) | ||
``` | ||
|
||
You can also invoke your model via a REST API | ||
``` | ||
curl -X POST " https://app.baseten.co/models/YOUR_MODEL_ID/predict" \ | ||
-H "Content-Type: application/json" \ | ||
-H 'Authorization: Api-Key {YOUR_API_KEY}' \ | ||
-d '{ | ||
"prompt": "What's the meaning of life?", | ||
"temperature": 0.1, | ||
"top_p": 0.75, | ||
"top_k": 40, | ||
"num_beams": 4 | ||
}' | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
apply_library_patches: true | ||
base_image: null | ||
build: | ||
arguments: {} | ||
model_server: TrussServer | ||
bundled_packages_dir: packages | ||
data_dir: data | ||
description: Generate text from a instructional prompt with this tuned version of | ||
LLaMA 7B. | ||
environment_variables: {} | ||
examples_filename: examples.yaml | ||
external_data: | ||
- backend: http_public | ||
local_data_path: pytorch_model-00001-of-00002.bin | ||
url: https://baseten-public.s3.us-west-2.amazonaws.com/models/alpaca/pytorch_model-00001-of-00002.bin | ||
- backend: http_public | ||
local_data_path: pytorch_model-00002-of-00002.bin | ||
url: https://baseten-public.s3.us-west-2.amazonaws.com/models/alpaca/pytorch_model-00002-of-00002.bin | ||
external_package_dirs: [] | ||
hf_cache: null | ||
input_type: Any | ||
live_reload: false | ||
model_class_filename: model.py | ||
model_class_name: Model | ||
model_framework: custom | ||
model_metadata: | ||
avatar_url: https://cdn.baseten.co/production/static/explore/meta.png | ||
cover_image_url: https://cdn.baseten.co/production/static/explore/alpaca.png | ||
example_model_input: | ||
num_beams: 4 | ||
prompt: What's the meaning of life? | ||
temperature: 0.1 | ||
top_p: 0.75 | ||
tags: | ||
- text-generation | ||
model_module_dir: model | ||
model_name: Alpaca 7B | ||
model_type: custom | ||
python_version: py38 | ||
requirements: | ||
- torch==2.0.1 | ||
- peft==0.3.0 | ||
- sentencepiece==0.1.99 | ||
- git+https://github.com/huggingface/transformers.git | ||
resources: | ||
accelerator: A10G | ||
cpu: '3' | ||
memory: 14Gi | ||
use_gpu: true | ||
runtime: | ||
predict_concurrency: 1 | ||
secrets: {} | ||
spec_version: '2.0' | ||
system_packages: [] | ||
train: | ||
resources: | ||
accelerator: null | ||
cpu: 500m | ||
memory: 512Mi | ||
use_gpu: false | ||
training_class_filename: train.py | ||
training_class_name: Train | ||
training_module_dir: train | ||
variables: {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{ | ||
"_name_or_path": "decapoda-research/llama-7b-hf", | ||
"architectures": [ | ||
"LlamaForCausalLM" | ||
], | ||
"bos_token_id": 0, | ||
"eos_token_id": 1, | ||
"hidden_act": "silu", | ||
"hidden_size": 4096, | ||
"initializer_range": 0.02, | ||
"intermediate_size": 11008, | ||
"max_position_embeddings": 2048, | ||
"max_sequence_length": 2048, | ||
"model_type": "llama", | ||
"num_attention_heads": 32, | ||
"num_hidden_layers": 32, | ||
"pad_token_id": -1, | ||
"rms_norm_eps": 1e-06, | ||
"tie_word_embeddings": false, | ||
"torch_dtype": "float16", | ||
"transformers_version": "4.29.0.dev0", | ||
"use_cache": true, | ||
"vocab_size": 32000 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"_from_model_config": true, | ||
"bos_token_id": 0, | ||
"eos_token_id": 1, | ||
"pad_token_id": 0, | ||
"transformers_version": "4.29.0.dev0" | ||
} |
Oops, something went wrong.