From 0c2841d71690d824cba9b311be0417dc53ba7d34 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Thu, 25 May 2023 11:42:00 +0800 Subject: [PATCH] chore: Add chatglm 6b (#32) Signed-off-by: Ce Gao --- .github/workflows/docker-publish.yml | 2 + .github/workflows/gcr.yml | 2 + README.md | 15 +++--- images/chatglm-6b/Dockerfile | 75 ++++++++++++++++++++++++++++ main.py | 19 +++++-- 5 files changed, 100 insertions(+), 13 deletions(-) create mode 100644 images/chatglm-6b/Dockerfile diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index b687586..18d98ca 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -25,6 +25,8 @@ jobs: image: - name: modelzai/llm-chatglm-6b-int4 dockerfile: ./images/base/Dockerfile + - name: modelzai/llm-chatglm-6b + dockerfile: ./images/chatglm-6b/Dockerfile - name: modelzai/llm-llama-7b dockerfile: ./images/llama-7b/Dockerfile # - name: modelzai/llm-fastchat-t5-3b diff --git a/.github/workflows/gcr.yml b/.github/workflows/gcr.yml index d73d57e..c9f483c 100644 --- a/.github/workflows/gcr.yml +++ b/.github/workflows/gcr.yml @@ -25,6 +25,8 @@ jobs: image: - name: modelzai/llm-chatglm-6b-int4 dockerfile: ./images/base/Dockerfile + - name: modelzai/llm-chatglm-6b + dockerfile: ./images/chatglm-6b/Dockerfile - name: modelzai/llm-llama-7b dockerfile: ./images/llama-7b/Dockerfile # - name: modelzai/llm-fastchat-t5-3b diff --git a/README.md b/README.md index 66ebd81..f53a7c4 100644 --- a/README.md +++ b/README.md @@ -33,17 +33,16 @@ python main.py Currently, we support the following models: -| Model Name | Model (`MODELZ_MODEL`) | Tokenizer (`MODELZ_TOKENIZER`) | -| ---------- | ------------ | ---------------- | -| Vicuna 7B Delta V1.1 | `lmsys/vicuna-7b-delta-v1.1` | `lmsys/vicuna-7b-delta-v1.1` | -| LLaMA 7B | `decapoda-research/llama-7b-hf` | `decapoda-research/llama-7b-hf` | -| ChatGLM 6B INT4 | `THUDM/chatglm-6b-int4` | `THUDM/chatglm-6b-int4` | -| ChatGLM 6B | `THUDM/chatglm-6b` | `THUDM/chatglm-6b` | +| Model Name | Model (`MODELZ_MODEL`) | Docker Image | +| ---------- | ----------- | ---------------- | +| Vicuna 7B Delta V1.1 | `lmsys/vicuna-7b-delta-v1.1` | [modelzai/llm-vicuna-7b](https://hub.docker.com/repository/docker/modelzai/llm-vicuna-7b/general) | +| LLaMA 7B | `decapoda-research/llama-7b-hf` | [modelzai/llm-llama-7b](https://hub.docker.com/repository/docker/modelzai/llm-llama-7b/general) | +| ChatGLM 6B INT4 | `THUDM/chatglm-6b-int4` | [modelzai/llm-chatglm-6b-int4](https://hub.docker.com/repository/docker/modelzai/llm-chatglm-6b-int4/general) | +| ChatGLM 6B | `THUDM/chatglm-6b` | [modelzai/llm-chatglm-6b](https://hub.docker.com/repository/docker/modelzai/llm-chatglm-6b/general) | - -You could set the `MODELZ_MODEL` and `MODELZ_TOKENIZER` environment variables to specify the model and tokenizer. +You could set the `MODELZ_MODEL` environment variables to specify the model and tokenizer. ### Use OpenAI python SDK diff --git a/images/chatglm-6b/Dockerfile b/images/chatglm-6b/Dockerfile new file mode 100644 index 0000000..572d55b --- /dev/null +++ b/images/chatglm-6b/Dockerfile @@ -0,0 +1,75 @@ +ARG base=nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04 + +FROM ${base} + +ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 +ENV PATH /opt/conda/bin:$PATH + +ENV MODELZ_MODEL=THUDM/chatglm-6b +ENV MODELZ_TOKENIZER=THUDM/chatglm-6b + +ARG MOSEC_PORT=8080 +ENV MOSEC_PORT=${MOSEC_PORT} + +ARG CONDA_VERSION=py310_22.11.1-1 + +RUN apt update && \ + apt install -y --no-install-recommends \ + wget \ + build-essential \ + ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +RUN set -x && \ + UNAME_M="$(uname -m)" && \ + if [ "${UNAME_M}" = "x86_64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \ + SHA256SUM="00938c3534750a0e4069499baf8f4e6dc1c2e471c86a59caa0dd03f4a9269db6"; \ + elif [ "${UNAME_M}" = "s390x" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \ + SHA256SUM="a150511e7fd19d07b770f278fb5dd2df4bc24a8f55f06d6274774f209a36c766"; \ + elif [ "${UNAME_M}" = "aarch64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \ + SHA256SUM="48a96df9ff56f7421b6dd7f9f71d548023847ba918c3826059918c08326c2017"; \ + elif [ "${UNAME_M}" = "ppc64le" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \ + SHA256SUM="4c86c3383bb27b44f7059336c3a46c34922df42824577b93eadecefbf7423836"; \ + fi && \ + wget "${MINICONDA_URL}" -O miniconda.sh -q && \ + echo "${SHA256SUM} miniconda.sh" > shasum && \ + if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \ + mkdir -p /opt && \ + bash miniconda.sh -b -p /opt/conda && \ + rm miniconda.sh shasum && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + /opt/conda/bin/conda clean -afy + +RUN conda create -n envd python=3.9 + +ENV ENVD_PREFIX=/opt/conda/envs/envd/bin + +RUN update-alternatives --install /usr/bin/python python ${ENVD_PREFIX}/python 1 && \ + update-alternatives --install /usr/bin/python3 python3 ${ENVD_PREFIX}/python3 1 && \ + update-alternatives --install /usr/bin/pip pip ${ENVD_PREFIX}/pip 1 && \ + update-alternatives --install /usr/bin/pip3 pip3 ${ENVD_PREFIX}/pip3 1 + +COPY requirements.txt / + +RUN pip install -r requirements.txt + +RUN mkdir -p /workspace + +COPY main.py workspace/ + +WORKDIR /workspace + +# RUN python main.py --dry-run + +# # disable huggingface update check (could be very slow) +# ENV HF_HUB_OFFLINE=true + +ENTRYPOINT [ "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080", "--workers", "1" ] diff --git a/main.py b/main.py index 504135c..059d6d7 100644 --- a/main.py +++ b/main.py @@ -38,11 +38,17 @@ class LLM: def __init__(self, model_name: str, tokenizer_name: str) -> None: + # Use the same tokenizer as the model + if tokenizer_name is None: + tokenizer_name = model_name + self.tokenizer = transformers.AutoTokenizer.from_pretrained( tokenizer_name, trust_remote_code=True ) - model_cls = getattr(transformers, LanguageModels.transformer_cls(model_name)) - self.model = model_cls.from_pretrained(model_name, trust_remote_code=True) + model_cls = getattr( + transformers, LanguageModels.transformer_cls(model_name)) + self.model = model_cls.from_pretrained( + model_name, trust_remote_code=True) self.device = ( torch.cuda.current_device() if torch.cuda.is_available() else "cpu" ) @@ -90,7 +96,8 @@ async def on_post(self, req: Request, resp: Response): logger.info(f"Failed to parse request: {err}") # return 400 otherwise the client will retry resp.status = falcon.HTTP_400 - resp.data = ErrorResponse.from_validation_err(err, str(buf)).to_json() + resp.data = ErrorResponse.from_validation_err( + err, str(buf)).to_json() return tokens = llm.encode(chat_req.get_prompt(self.model_name)) @@ -104,7 +111,8 @@ async def on_post(self, req: Request, resp: Response): model=self.model_name, created=datetime.now(), choices=[ - ChatChoice(message=ChatMessage(content=msg, role=Role.ASSISTANT)), + ChatChoice(message=ChatMessage( + content=msg, role=Role.ASSISTANT)), ], usage=TokenUsage( prompt_tokens=input_length, @@ -127,7 +135,8 @@ async def on_post(self, req: Request, resp: Response): logger.info(f"Failed to parse request: {err}") # return 400 otherwise the client will retry resp.status = falcon.HTTP_400 - resp.data = ErrorResponse.from_validation_err(err, str(buf)).to_json() + resp.data = ErrorResponse.from_validation_err( + err, str(buf)).to_json() return tokens = llm.encode(prompt_req.get_prompt())