From 6062c89cf764b3000395747f666a381b0df6bbd9 Mon Sep 17 00:00:00 2001 From: Logan Adams <114770087+loadams@users.noreply.github.com> Date: Mon, 26 Feb 2024 10:10:18 -0800 Subject: [PATCH 1/5] Remove references to --extra-index-url in MII repo (#421) --- mii/aml_related/templates.py | 3 ++- mii/legacy/aml_related/templates.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/mii/aml_related/templates.py b/mii/aml_related/templates.py index 66de070c..71f1cb44 100644 --- a/mii/aml_related/templates.py +++ b/mii/aml_related/templates.py @@ -162,7 +162,8 @@ CONDA_DEFAULT_ENV=amlenv \ PATH=$PATH:/usr/local/cuda/bin -RUN /opt/miniconda/envs/amlenv/bin/pip install -r "$BUILD_DIR/requirements.txt" --extra-index-url https://download.pytorch.org/whl/cu113 && \ +RUN /opt/miniconda/envs/amlenv/bin/pip install torch torchvision --index-url https://download.pytorch.org/whl/cu113 && \ + /opt/miniconda/envs/amlenv/bin/pip install -r "$BUILD_DIR/requirements.txt" && \ /opt/miniconda/envs/amlenv/bin/pip install azureml-inference-server-http && \ /opt/miniconda/envs/amlenv/bin/pip install git+https://github.com/microsoft/DeepSpeed.git && \ /opt/miniconda/envs/amlenv/bin/pip install git+https://github.com/microsoft/DeepSpeed-MII.git && \ diff --git a/mii/legacy/aml_related/templates.py b/mii/legacy/aml_related/templates.py index 66de070c..71f1cb44 100644 --- a/mii/legacy/aml_related/templates.py +++ b/mii/legacy/aml_related/templates.py @@ -162,7 +162,8 @@ CONDA_DEFAULT_ENV=amlenv \ PATH=$PATH:/usr/local/cuda/bin -RUN /opt/miniconda/envs/amlenv/bin/pip install -r "$BUILD_DIR/requirements.txt" --extra-index-url https://download.pytorch.org/whl/cu113 && \ +RUN /opt/miniconda/envs/amlenv/bin/pip install torch torchvision --index-url https://download.pytorch.org/whl/cu113 && \ + /opt/miniconda/envs/amlenv/bin/pip install -r "$BUILD_DIR/requirements.txt" && \ /opt/miniconda/envs/amlenv/bin/pip install azureml-inference-server-http && \ /opt/miniconda/envs/amlenv/bin/pip install git+https://github.com/microsoft/DeepSpeed.git && \ /opt/miniconda/envs/amlenv/bin/pip install git+https://github.com/microsoft/DeepSpeed-MII.git && \ From 429bc5caffef65bee0da1a36f3cf5d8998fcf748 Mon Sep 17 00:00:00 2001 From: Michael Wyatt Date: Thu, 7 Mar 2024 17:25:49 -0800 Subject: [PATCH 2/5] Add quantization config option (#433) Co-authored-by: ZHENG, Zhen Co-authored-by: Logan Adams --- mii/api.py | 1 + mii/config.py | 13 +++++++++++++ requirements/requirements.txt | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/mii/api.py b/mii/api.py index d46ded1d..d909c837 100644 --- a/mii/api.py +++ b/mii/api.py @@ -50,6 +50,7 @@ def _parse_kwargs_to_model_config( # Create the ModelConfig object and return it with remaining kwargs model_config = ModelConfig(**model_config) + return model_config, remaining_kwargs diff --git a/mii/config.py b/mii/config.py index 490947f1..8e9c5cd7 100644 --- a/mii/config.py +++ b/mii/config.py @@ -131,6 +131,12 @@ class ModelConfig(MIIConfigModel): `inference_engine_config`. """ + quantization_mode: Optional[str] = None + """ + The quantization mode in string format. The supported modes are as follows: + - 'wf6af16', weight-only quantization with FP6 weight and FP16 activation. + """ + inference_engine_config: RaggedInferenceEngineConfig = {} """ DeepSpeed inference engine config. This is automatically generated, but you @@ -210,6 +216,13 @@ def propagate_tp_size(cls, values: Dict[str, Any]) -> Dict[str, Any]: values.get("inference_engine_config").tensor_parallel.tp_size = tensor_parallel return values + @root_validator + def propagate_quantization_mode(cls, values: Dict[str, Any]) -> Dict[str, Any]: + quantization_mode = values.get("quantization_mode") + values.get( + "inference_engine_config").quantization.quantization_mode = quantization_mode + return values + @root_validator def check_replica_config(cls, values: Dict[str, Any]) -> Dict[str, Any]: num_replica_config = len(values.get("replica_configs")) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index f067ee09..019fc261 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ asyncio -deepspeed>=0.13.0 +deepspeed>=0.14.0 deepspeed-kernels Flask-RESTful grpcio From 773b735d6294a98dd842d82ef024d0d9b050f66a Mon Sep 17 00:00:00 2001 From: Michael Wyatt Date: Thu, 7 Mar 2024 17:27:47 -0800 Subject: [PATCH 3/5] Update version.txt after 0.2.3 release (#434) Co-authored-by: loadams --- version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.txt b/version.txt index 71790396..abd41058 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.2.3 +0.2.4 From 77693eae86b94d00af05bea089a6097702fa0cd3 Mon Sep 17 00:00:00 2001 From: Logan Adams <114770087+loadams@users.noreply.github.com> Date: Wed, 3 Apr 2024 14:12:54 -0700 Subject: [PATCH 4/5] Update GH workflow and workflow runner requirements. (#456) --- .github/workflows/nv-v100-legacy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nv-v100-legacy.yml b/.github/workflows/nv-v100-legacy.yml index 692793dc..426996b3 100644 --- a/.github/workflows/nv-v100-legacy.yml +++ b/.github/workflows/nv-v100-legacy.yml @@ -9,7 +9,7 @@ on: - 'mii/__init__.py' - 'mii/legacy/**' - 'tests/legacy/**' - - '.github/workflows/nv-torch-latest-v100.yml' + - '.github/workflows/nv-v100-legacy.yml' - 'requirements/**' - 'setup.py' @@ -19,7 +19,7 @@ concurrency: jobs: unit-tests: - runs-on: [self-hosted, nvidia, cu116, v100] + runs-on: [self-hosted, nvidia, cu117, v100] steps: - uses: actions/checkout@v4 From 26a853d54b819254857e62bb427db914b1894657 Mon Sep 17 00:00:00 2001 From: Charles Coulombe Date: Wed, 3 Apr 2024 17:51:01 -0400 Subject: [PATCH 5/5] Update pyzmq in requirements.txt (#447) Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com> --- requirements/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 019fc261..54a8a1f6 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -6,9 +6,9 @@ grpcio grpcio-tools Pillow pydantic +pyzmq safetensors torch transformers ujson Werkzeug -zmq