From 6c71581883be7e9b8c9ed0bb977fd8dbfaf0d99d Mon Sep 17 00:00:00 2001 From: Michael Wyatt Date: Thu, 22 Aug 2024 15:52:00 -0700 Subject: [PATCH] Pydantic v2 migration (#423) Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com> Co-authored-by: Abhishek Kulkarni Co-authored-by: Logan Adams --- docs/requirements.txt | 4 +- mii/api.py | 10 +- mii/backend/client.py | 2 +- mii/backend/server.py | 19 ++- mii/config.py | 125 ++++++++--------- mii/legacy/client.py | 2 +- mii/legacy/config.py | 179 ++++++++++++------------ mii/legacy/deployment.py | 18 +-- mii/legacy/pydantic_v1.py | 16 --- mii/legacy/server.py | 15 +- mii/legacy/utils.py | 2 +- mii/pydantic_v1.py | 16 --- mii/score/generate.py | 2 +- requirements/requirements.txt | 4 +- tests/legacy/test_config.py | 10 +- tests/legacy/test_deployment_options.py | 4 +- tests/test_arg_parsing.py | 24 ++-- 17 files changed, 207 insertions(+), 245 deletions(-) delete mode 100644 mii/legacy/pydantic_v1.py delete mode 100644 mii/pydantic_v1.py diff --git a/docs/requirements.txt b/docs/requirements.txt index e2a2fd67..1afb6a65 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,6 @@ asyncio -autodoc_pydantic<2.0.0 -deepspeed>=0.13.0 +autodoc_pydantic>=2.0.0 +deepspeed>=0.15.0 grpcio grpcio-tools sphinx==7.1.2 diff --git a/mii/api.py b/mii/api.py index 77ed6e19..841f7624 100644 --- a/mii/api.py +++ b/mii/api.py @@ -39,7 +39,7 @@ def _parse_kwargs_to_model_config( # Fill model_config dict with relevant kwargs, store remaining kwargs in a new dict remaining_kwargs = {} for key, val in kwargs.items(): - if key in ModelConfig.__dict__["__fields__"]: + if key in ModelConfig.model_fields.keys(): if key in model_config: assert ( model_config.get(key) == val @@ -77,7 +77,7 @@ def _parse_kwargs_to_mii_config( # Fill mii_config dict with relevant kwargs, raise error on unknown kwargs for key, val in remaining_kwargs.items(): - if key in MIIConfig.__dict__["__fields__"]: + if key in MIIConfig.model_fields.keys(): if key in mii_config: assert ( mii_config.get(key) == val @@ -183,9 +183,9 @@ def serve( mii.aml_related.utils.generate_aml_scripts( acr_name=acr_name, deployment_name=mii_config.deployment_name, - model_name=mii_config.model_config.model, - task_name=mii_config.model_config.task, - replica_num=mii_config.model_config.replica_num, + model_name=mii_config.model_conf.model, + task_name=mii_config.model_conf.task, + replica_num=mii_config.model_conf.replica_num, instance_type=mii_config.instance_type, version=mii_config.version, ) diff --git a/mii/backend/client.py b/mii/backend/client.py index cb4acc17..d946fce6 100644 --- a/mii/backend/client.py +++ b/mii/backend/client.py @@ -37,7 +37,7 @@ class MIIClient: """ def __init__(self, mii_config: MIIConfig, host: str = "localhost") -> None: self.mii_config = mii_config - self.task = mii_config.model_config.task + self.task = mii_config.model_conf.task self.port = mii_config.port_number self.asyncio_loop = asyncio.get_event_loop() channel = create_channel(host, self.port) diff --git a/mii/backend/server.py b/mii/backend/server.py index 02e055d5..ac51a018 100644 --- a/mii/backend/server.py +++ b/mii/backend/server.py @@ -20,7 +20,7 @@ def config_to_b64_str(config: DeepSpeedConfigModel) -> str: # convert json str -> bytes - json_bytes = config.json().encode() + json_bytes = config.model_dump_json().encode() # base64 encoded bytes b64_config_bytes = base64.urlsafe_b64encode(json_bytes) # bytes -> str @@ -31,7 +31,7 @@ class MIIServer: """Initialize the model, setup the server for the model""" def __init__(self, mii_config: MIIConfig) -> None: - self.task = mii_config.model_config.task + self.task = mii_config.model_conf.task self.port_number = mii_config.port_number if not os.path.isfile(mii_config.hostfile): @@ -47,8 +47,7 @@ def __init__(self, mii_config: MIIConfig) -> None: # balancer process, each DeepSpeed model replica, and optionally the # REST API process) processes = self._initialize_service(mii_config) - self._wait_until_server_is_live(processes, - mii_config.model_config.replica_configs) + self._wait_until_server_is_live(processes, mii_config.model_conf.replica_configs) def _wait_until_server_is_live(self, processes: List[subprocess.Popen], @@ -143,15 +142,15 @@ def _initialize_service(self, mii_config: MIIConfig) -> List[subprocess.Popen]: ] host_gpus = defaultdict(list) - for repl_config in mii_config.model_config.replica_configs: + for repl_config in mii_config.model_conf.replica_configs: host_gpus[repl_config.hostname].extend(repl_config.gpu_indices) use_multiple_hosts = len( set(repl_config.hostname - for repl_config in mii_config.model_config.replica_configs)) > 1 + for repl_config in mii_config.model_conf.replica_configs)) > 1 # Start replica instances - for repl_config in mii_config.model_config.replica_configs: + for repl_config in mii_config.model_conf.replica_configs: hostfile = tempfile.NamedTemporaryFile(delete=False) hostfile.write( f"{repl_config.hostname} slots={max(host_gpus[repl_config.hostname])+1}\n" @@ -161,7 +160,7 @@ def _initialize_service(self, mii_config: MIIConfig) -> List[subprocess.Popen]: use_multiple_hosts) processes.append( self._launch_server_process( - mii_config.model_config, + mii_config.model_conf, "MII server", ds_launch_str=ds_launch_str, server_args=server_args + [ @@ -175,7 +174,7 @@ def _initialize_service(self, mii_config: MIIConfig) -> List[subprocess.Popen]: # expected to assign one GPU to one process. processes.append( self._launch_server_process( - mii_config.model_config, + mii_config.model_conf, "load balancer", server_args=server_args + ["--load-balancer"], )) @@ -183,7 +182,7 @@ def _initialize_service(self, mii_config: MIIConfig) -> List[subprocess.Popen]: if mii_config.enable_restful_api: processes.append( self._launch_server_process( - mii_config.model_config, + mii_config.model_conf, "restful api gateway", server_args=server_args + ["--restful-gateway"], )) diff --git a/mii/config.py b/mii/config.py index 565cdbbc..a1cafb66 100644 --- a/mii/config.py +++ b/mii/config.py @@ -8,27 +8,18 @@ from deepspeed.launcher.runner import DLTS_HOSTFILE, fetch_hostfile from deepspeed.inference import RaggedInferenceEngineConfig +from deepspeed.runtime.config_utils import DeepSpeedConfigModel +from pydantic import Field, model_validator, field_validator from mii.constants import DeploymentType, TaskType, ModelProvider from mii.errors import DeploymentNotFoundError from mii.modeling.tokenizers import MIITokenizerWrapper -from mii.pydantic_v1 import BaseModel, Field, root_validator, validator, Extra -from mii.utils import generate_deployment_name, get_default_task, import_score_file +from mii.utils import generate_deployment_name, import_score_file DEVICE_MAP_DEFAULT = "auto" -class MIIConfigModel(BaseModel): - class Config: - validate_all = True - validate_assignment = True - use_enum_values = True - allow_population_by_field_name = True - extra = "forbid" - arbitrary_types_allowed = True - - -class GenerateParamsConfig(MIIConfigModel): +class GenerateParamsConfig(DeepSpeedConfigModel): """ Options for changing text-generation behavior. """ @@ -39,7 +30,7 @@ class GenerateParamsConfig(MIIConfigModel): max_length: int = 1024 """ Maximum length of ``input_tokens`` + ``generated_tokens``. """ - max_new_tokens: int = None + max_new_tokens: Optional[int] = None """ Maximum number of new tokens generated. ``max_length`` takes precedent. """ min_new_tokens: int = 0 @@ -68,24 +59,25 @@ class GenerateParamsConfig(MIIConfigModel): stop: List[str] = [] """ List of strings to stop generation at.""" - @validator("stop", pre=True) + @field_validator("stop", mode="before") + @classmethod def make_stop_string_list(cls, field_value: Union[str, List[str]]) -> List[str]: if isinstance(field_value, str): return [field_value] return field_value - @validator("stop") + @field_validator("stop") + @classmethod def sort_stop_strings(cls, field_value: List[str]) -> List[str]: return sorted(field_value) - @root_validator - def check_prompt_length(cls, values: Dict[str, Any]) -> Dict[str, Any]: - prompt_length = values.get("prompt_length") - max_length = values.get("max_length") - assert max_length > prompt_length, f"max_length ({max_length}) must be greater than prompt_length ({prompt_length})" - return values + @model_validator(mode="after") + def check_prompt_length(self) -> "GenerateParamsConfig": + assert self.max_length > self.prompt_length, f"max_length ({self.max_length}) must be greater than prompt_length ({self.prompt_length})" + return self - @root_validator + @model_validator(mode="before") + @classmethod def set_max_new_tokens(cls, values: Dict[str, Any]) -> Dict[str, Any]: max_length = values.get("max_length") max_new_tokens = values.get("max_new_tokens") @@ -94,19 +86,16 @@ def set_max_new_tokens(cls, values: Dict[str, Any]) -> Dict[str, Any]: values["max_new_tokens"] = max_length - prompt_length return values - class Config: - extra = Extra.forbid - -class ReplicaConfig(MIIConfigModel): +class ReplicaConfig(DeepSpeedConfigModel): hostname: str = "" tensor_parallel_ports: List[int] = [] - torch_dist_port: int = None + torch_dist_port: Optional[int] = None gpu_indices: List[int] = [] - zmq_port: int = None + zmq_port: Optional[int] = None -class ModelConfig(MIIConfigModel): +class ModelConfig(DeepSpeedConfigModel): model_name_or_path: str """ Model name or path of the model to HuggingFace model to be deployed. @@ -192,8 +181,9 @@ class ModelConfig(MIIConfigModel): def provider(self) -> ModelProvider: return ModelProvider.HUGGING_FACE - @validator("device_map", pre=True) - def make_device_map_dict(cls, v): + @field_validator("device_map", mode="before") + @classmethod + def make_device_map_dict(cls, v: Any) -> Dict: if isinstance(v, int): return {"localhost": [[v]]} if isinstance(v, list) and isinstance(v[0], int): @@ -202,36 +192,36 @@ def make_device_map_dict(cls, v): return {"localhost": v} return v - @root_validator + @model_validator(mode="before") + @classmethod def auto_fill_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: + assert values.get("model_name_or_path"), "model_name_or_path must be provided" if not values.get("tokenizer"): values["tokenizer"] = values.get("model_name_or_path") - if not values.get("task"): - values["task"] = get_default_task(values.get("model_name_or_path")) + #if not values.get("task"): + # values["task"] = get_default_task(values.get("model_name_or_path")) + values["task"] = TaskType.TEXT_GENERATION return values - @root_validator - def propagate_tp_size(cls, values: Dict[str, Any]) -> Dict[str, Any]: - tensor_parallel = values.get("tensor_parallel") - values.get("inference_engine_config").tensor_parallel.tp_size = tensor_parallel - return values - - @root_validator - def propagate_quantization_mode(cls, values: Dict[str, Any]) -> Dict[str, Any]: - quantization_mode = values.get("quantization_mode") - values.get( - "inference_engine_config").quantization.quantization_mode = quantization_mode - return values + @model_validator(mode="after") + def propagate_tp_size(self) -> "ModelConfig": + self.inference_engine_config.tensor_parallel.tp_size = self.tensor_parallel + return self - @root_validator - def check_replica_config(cls, values: Dict[str, Any]) -> Dict[str, Any]: - num_replica_config = len(values.get("replica_configs")) + @model_validator(mode="after") + def check_replica_config(self) -> "ModelConfig": + num_replica_config = len(self.replica_configs) if num_replica_config > 0: - assert num_replica_config == values.get("replica_num"), "Number of replica configs must match replica_num" - return values + assert num_replica_config == self.replica_num, "Number of replica configs must match replica_num" + return self + + @model_validator(mode="after") + def propagate_quantization_mode(self) -> "ModelConfig": + self.inference_engine_config.quantization.quantization_mode = self.quantization_mode + return self -class MIIConfig(MIIConfigModel): +class MIIConfig(DeepSpeedConfigModel): deployment_name: str = "" """ Name of the deployment. Used as an identifier for obtaining a inference @@ -245,7 +235,7 @@ class MIIConfig(MIIConfigModel): * `AML` will generate the assets necessary to deploy on AML resources. """ - model_config: ModelConfig + model_conf: ModelConfig = Field(alias="model_config") """ Configuration for the deployed model(s). """ @@ -290,17 +280,18 @@ class MIIConfig(MIIConfigModel): """ AML instance type to use when create AML deployment assets. """ - @root_validator(skip_on_failure=True) - def AML_name_valid(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if values.get("deployment_type") == DeploymentType.AML: + @model_validator(mode="after") + def AML_name_valid(self) -> "MIIConfig": + if self.deployment_type == DeploymentType.AML: allowed_chars = set(string.ascii_lowercase + string.ascii_uppercase + string.digits + "-") assert ( - set(values.get("deployment_name")) <= allowed_chars + set(self.deployment_name) <= allowed_chars ), "AML deployment names can only contain a-z, A-Z, 0-9, and '-'." - return values + return self - @root_validator(skip_on_failure=True) + @model_validator(mode="before") + @classmethod def check_deployment_name(cls, values: Dict[str, Any]) -> Dict[str, Any]: deployment_name = values.get("deployment_name") if not deployment_name: @@ -311,14 +302,14 @@ def check_deployment_name(cls, values: Dict[str, Any]) -> Dict[str, Any]: return values def generate_replica_configs(self) -> None: - if self.model_config.replica_configs: + if self.model_conf.replica_configs: return - torch_dist_port = self.model_config.torch_dist_port - tensor_parallel = self.model_config.tensor_parallel + torch_dist_port = self.model_conf.torch_dist_port + tensor_parallel = self.model_conf.tensor_parallel replica_pool = _allocate_devices(self.hostfile, tensor_parallel, - self.model_config.replica_num, - self.model_config.device_map) + self.model_conf.replica_num, + self.model_conf.device_map) replica_configs = [] for i, (hostname, gpu_indices) in enumerate(replica_pool): # Reserver port for a LB proxy when replication is enabled @@ -332,10 +323,10 @@ def generate_replica_configs(self) -> None: tensor_parallel_ports=tensor_parallel_ports, torch_dist_port=replica_torch_dist_port, gpu_indices=gpu_indices, - zmq_port=self.model_config.zmq_port_number + i, + zmq_port=self.model_conf.zmq_port_number + i, )) - self.model_config.replica_configs = replica_configs + self.model_conf.replica_configs = replica_configs def _allocate_devices(hostfile_path: str, diff --git a/mii/legacy/client.py b/mii/legacy/client.py index 0a03d810..2f299eb1 100644 --- a/mii/legacy/client.py +++ b/mii/legacy/client.py @@ -37,7 +37,7 @@ def mii_query_handle(deployment_name): return MIINonPersistentClient(task, deployment_name) mii_config = _get_mii_config(deployment_name) - return MIIClient(mii_config.model_config.task, + return MIIClient(mii_config.model_conf.task, "localhost", # TODO: This can probably be removed mii_config.port_number) diff --git a/mii/legacy/config.py b/mii/legacy/config.py index 793c976f..e149cc7a 100644 --- a/mii/legacy/config.py +++ b/mii/legacy/config.py @@ -5,20 +5,21 @@ import torch import os import string +from pydantic import field_validator, model_validator, Field from typing import List, Optional, Dict, Any -import mii.legacy as mii -from .constants import DeploymentType, TaskType, ModelProvider, MII_MODEL_PATH_DEFAULT -from .pydantic_v1 import validator, root_validator, Field from deepspeed.runtime.config_utils import DeepSpeedConfigModel from deepspeed.inference.config import DtypeEnum from deepspeed.launcher.runner import DLTS_HOSTFILE, fetch_hostfile +import mii.legacy as mii +from .constants import DeploymentType, TaskType, ModelProvider, MII_MODEL_PATH_DEFAULT + class ReplicaConfig(DeepSpeedConfigModel): hostname: str = "" tensor_parallel_ports: List[int] = [] - torch_dist_port: int = None + torch_dist_port: Optional[int] = None gpu_indices: List[int] = [] @@ -39,7 +40,7 @@ class ModelConfig(DeepSpeedConfigModel): 'text-to-image']`` """ - dtype: DtypeEnum = DtypeEnum.fp32 + dtype: torch.dtype = torch.float32 """ Desired model data type, will convert model to this type. Supported target types: `torch.half`, `torch.float`, `torch.int8` (for BLOOM models) @@ -102,9 +103,12 @@ class ModelConfig(DeepSpeedConfigModel): hf_auth_token: Optional[str] = Field( None, - deprecated=True, - deprecated_msg= - "Parameter will be removed. Please use the `pipeline_kwargs` field to pass kwargs to the HuggingFace pipeline creation.", + json_schema_extra={ + "deprecated": + True, + "deprecated_msg": + "Parameter will be removed. Please use the `pipeline_kwargs` field to pass kwargs to the HuggingFace pipeline creation." + }, ) """ HuggingFace authentication token for accessing models. Will be propagated @@ -113,9 +117,12 @@ class ModelConfig(DeepSpeedConfigModel): trust_remote_code: bool = Field( False, - deprecated=True, - deprecated_msg= - "Parameter will be removed. Please use the `pipeline_kwargs` field to pass kwargs to the HuggingFace pipeline creation.", + json_schema_extra={ + "deprecated": + True, + "deprecated_msg": + "Parameter will be removed. Please use the `pipeline_kwargs` field to pass kwargs to the HuggingFace pipeline creation." + }, ) """ HuggingFace `tranformer.pipeline` option for `trust_remote_code`. @@ -168,15 +175,13 @@ class ModelConfig(DeepSpeedConfigModel): the input and output tokens. Please consider increasing it to the required token-length required for your use-case. """ - class Config: - json_encoders = {torch.dtype: lambda x: str(x)} - @property def provider(self): return mii.utils.get_provider(self.model, self.task) - @validator("checkpoint_dict") - def checkpoint_dict_valid(cls, field_value, values): + @field_validator("checkpoint_dict", mode="after") + @classmethod + def checkpoint_dict_valid(cls, field_value): if field_value is None: return field_value for k in ["checkpoints", "version", "type", "base_dir"]: @@ -184,51 +189,56 @@ def checkpoint_dict_valid(cls, field_value, values): raise ValueError(f"Missing key={k} in checkpoint_dict") return field_value - @validator("deploy_rank", pre=True) - def deploy_rank_to_list(cls, field_value, values): + @field_validator("deploy_rank", mode="before") + @classmethod + def deploy_rank_to_list(cls, field_value): if field_value and not isinstance(field_value, list): field_value = [field_value] return field_value - @root_validator - def zero_or_meta(cls, values): - if values.get("enable_zero"): - assert not values.get( - "meta_tensor" - ), "ZeRO-Inference does not support meta tensors." - return values + @field_validator("dtype", mode="before") + def validate_dtype(cls, field_value, values): + if isinstance(field_value, str): + return DtypeEnum.from_str(field_value).value[0] + if isinstance(field_value, torch.dtype): + return field_value + raise TypeError(f"Invalid type for dtype: {type(field_value)}") - @root_validator - def bloom_model_valid(cls, values): - if "bigscience/bloom" in values.get("model"): + @model_validator(mode="after") + def zero_or_meta(self): + if self.enable_zero: + assert not self.meta_tensor, "ZeRO-Inference does not support meta tensors." + return self + + @model_validator(mode="after") + def bloom_model_valid(self): + if "bigscience/bloom" in self.model: # TODO: SHould be albe to use DtypeEnum here - assert values.get("dtype") in [ + assert self.dtype in [ torch.int8, torch.float16, ], "Bloom models only support fp16/int8." - assert not values.get( - "enable_cuda_graph" - ), "Bloom models do not support CUDA Graph." - return values + assert not self.enable_cuda_graph, "Bloom models do not support CUDA Graph." + return self - @root_validator - def deploy_rank_valid(cls, values): - tensor_parallel = values.get("tensor_parallel") - deploy_rank = values.get("deploy_rank") + @model_validator(mode="after") + def deploy_rank_valid(self): + deploy_rank = self.deploy_rank # if deploy rank is not given, default to align with TP value if deploy_rank is None: - deploy_rank = list(range(tensor_parallel)) + deploy_rank = list(range(self.tensor_parallel)) # number of ranks provided must be equal to TP size, DP is handled outside MII currently - assert tensor_parallel == len( + assert self.tensor_parallel == len( deploy_rank - ), f"{len(deploy_rank)} rank(s) provided in 'deploy_rank' does not align with tensor_parallel size of {tensor_parallel}" + ), f"{len(deploy_rank)} rank(s) provided in 'deploy_rank' does not align with tensor_parallel size of {self.tensor_parallel}" - values["deploy_rank"] = deploy_rank - return values + self.__dict__["deploy_rank"] = deploy_rank + return self - @root_validator + @model_validator(mode="before") + @classmethod def set_model_path(cls, values): model_path = values.get("model_path") if not model_path: @@ -249,54 +259,47 @@ def set_model_path(cls, values): values["model_path"] = model_path return values - @root_validator - def validate_model_and_task(cls, values): - task = values.get("task") - model = values.get("model") - if not values.get("skip_model_check"): - mii.utils.check_if_task_and_model_is_valid(task, model) - if values.get("enable_deepspeed"): - mii.utils.check_if_task_and_model_is_supported(task, model) - # Skip any future checks - values["skip_model_check"] = True - return values + @model_validator(mode="after") + def validate_model_and_task(self): + if not self.skip_model_check: + mii.utils.check_if_task_and_model_is_valid(self.task, self.model) + mii.utils.check_if_task_and_model_is_supported(self.task, self.model) + return self - @root_validator - def meta_tensor_or_sys_mem(cls, values): - if values.get("meta_tensor") and values.get("load_with_sys_mem"): + @model_validator(mode="after") + def meta_tensor_or_sys_mem(self): + if self.meta_tensor and self.load_with_sys_mem: raise ValueError( "`meta_tensor` and `load_with_sys_mem` cannot be active at the same time." ) - return values - - @root_validator - def sys_mem_and_diffusers(cls, values): - if values.get("load_with_sys_mem"): - model = values.get("model") - task = values.get("task") - assert not (mii.utils.get_provider(model, task) == ModelProvider.DIFFUSERS), "`load_with_sys_mem` is not support with Stable Diffusion" - return values - - @root_validator - def zero_dtype_valid(cls, values): - if values.get("enable_zero"): - if values.get("ds_config").get("fp16", {}).get("enabled", False): + return self + + @model_validator(mode="after") + def sys_mem_and_diffusers(self): + if self.load_with_sys_mem: + assert not (mii.utils.get_provider(self.model, self.task) == ModelProvider.DIFFUSERS), "`load_with_sys_mem` is not support with Stable Diffusion" + return self + + @model_validator(mode="after") + def zero_dtype_valid(self): + if self.enable_zero: + if self.ds_config.get("fp16", {}).get("enabled", False): # TODO: We should be able to use DtypeEnum instead of torch.float assert ( - values.get("dtype") == torch.float16 + self.dtype == torch.float16 ), "ZeRO FP16 enabled, `dtype` must be set to `torch.float16`" else: assert ( - values.get("dtype") == torch.float32 + self.dtype == torch.float32 ), "ZeRO FP16 disabled, `dtype` must be set to `torch.float32`" - return values + return self - @root_validator - def deepspeed_or_zero(cls, values): + @model_validator(mode="after") + def deepspeed_or_zero(self): assert not ( - values.get("enable_deepspeed") and values.get("enable_zero") + self.enable_deepspeed and self.enable_zero ), "DeepSpeed and ZeRO cannot both be enabled, select only one" - return values + return self class MIIConfig(DeepSpeedConfigModel): @@ -314,7 +317,7 @@ class MIIConfig(DeepSpeedConfigModel): * `AML` will generate the assets necessary to deploy on AML resources. """ - model_config: ModelConfig + model_conf: ModelConfig """ Configuration for the deployed model(s). """ @@ -349,23 +352,23 @@ class MIIConfig(DeepSpeedConfigModel): """ AML instance type to use when create AML deployment assets. """ - @root_validator(skip_on_failure=True) - def AML_name_valid(cls, values): - if values.get("deployment_type") == DeploymentType.AML: + @model_validator(mode="after") + def AML_name_valid(self): + if self.deployment_type == DeploymentType.AML: allowed_chars = set(string.ascii_lowercase + string.ascii_uppercase + string.digits + "-") assert ( - set(values.get("deployment_name")) <= allowed_chars + set(self.deployment_name) <= allowed_chars ), "AML deployment names can only contain a-z, A-Z, 0-9, and '-'." - return values + return self def generate_replica_configs(self): # TODO: refactor this function hostfile = self.hostfile port_number = self.port_number - torch_dist_port = self.model_config.torch_dist_port - tensor_parallel = self.model_config.tensor_parallel - replica_num = self.model_config.replica_num + torch_dist_port = self.model_conf.torch_dist_port + tensor_parallel = self.model_conf.tensor_parallel + replica_num = self.model_conf.replica_num replica_pool = _allocate_processes(hostfile, tensor_parallel, replica_num) replica_configs = [] for i, (hostname, gpu_indices) in enumerate(replica_pool): @@ -382,7 +385,7 @@ def generate_replica_configs(self): gpu_indices=gpu_indices, )) - self.model_config.replica_configs = replica_configs + self.model_conf.replica_configs = replica_configs def _allocate_processes(hostfile_path, tensor_parallel, replica_num): diff --git a/mii/legacy/deployment.py b/mii/legacy/deployment.py index 59954901..b8b0753f 100644 --- a/mii/legacy/deployment.py +++ b/mii/legacy/deployment.py @@ -37,7 +37,7 @@ def support_legacy_api( } # TODO do this in a single for loop for key, val in mii_config.items(): - if key not in MIIConfig.__dict__["__fields__"]: + if key not in MIIConfig.fields.keys(): model_config[key] = val mii_config = { k: v @@ -68,10 +68,10 @@ def deploy( model_config, mii_config = support_legacy_api(*args, **kwargs) mii_config["deployment_name"] = deployment_name - mii_config["model_config"] = model_config + mii_config["model_conf"] = model_config mii_config = mii.config.MIIConfig(**mii_config) - if mii_config.model_config.enable_deepspeed: + if mii_config.model_conf.enable_deepspeed: logger.info( "************* MII is using DeepSpeed Optimizations to accelerate your model *************" ) @@ -100,9 +100,9 @@ def _deploy_aml(mii_config): mii.aml_related.utils.generate_aml_scripts( acr_name=acr_name, deployment_name=mii_config.deployment_name, - model_name=mii_config.model_config.model, - task_name=mii_config.model_config.task, - replica_num=mii_config.model_config.replica_num, + model_name=mii_config.model_conf.model, + task_name=mii_config.model_conf.task, + replica_num=mii_config.model_conf.replica_num, instance_type=mii_config.instance_type, version=mii_config.version, ) @@ -115,10 +115,10 @@ def _deploy_aml(mii_config): def _deploy_nonpersistent(mii_config): assert ( int(os.getenv("WORLD_SIZE", "1")) - == mii_config.model_config.tensor_parallel + == mii_config.model_conf.tensor_parallel ), "World Size does not equal number of tensors. When using non-persistent deployment type, please launch with `deepspeed --num_gpus `" deployment_name = mii_config.deployment_name mii.non_persistent_models[deployment_name] = ( - load_models(mii_config.model_config), - mii_config.model_config.task, + load_models(mii_config.model_conf), + mii_config.model_conf.task, ) diff --git a/mii/legacy/pydantic_v1.py b/mii/legacy/pydantic_v1.py deleted file mode 100644 index 6aba072a..00000000 --- a/mii/legacy/pydantic_v1.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team -"""Pydantic v1 compatibility module. - -Pydantic v2 introduced breaking changes that hinder its adoption: -https://docs.pydantic.dev/latest/migration/. To provide deepspeed users the option to -migrate to pydantic v2 on their own timeline, deepspeed uses this compatibility module -as a pydantic-version-agnostic alias for pydantic's v1 API. -""" - -try: - from pydantic.v1 import * # noqa: F401 -except ImportError: - from pydantic import * # noqa: F401 diff --git a/mii/legacy/server.py b/mii/legacy/server.py index 8a66f3ec..75ba24fe 100644 --- a/mii/legacy/server.py +++ b/mii/legacy/server.py @@ -28,7 +28,7 @@ class MIIServer: """Initialize the model, setup the server for the model under model_path""" def __init__(self, mii_config): - self.task = mii_config.model_config.task + self.task = mii_config.model_conf.task self.num_gpus = get_num_gpus(mii_config) assert self.num_gpus > 0, "GPU count must be greater than 0" @@ -44,8 +44,7 @@ def __init__(self, mii_config): mii_config.generate_replica_configs() processes = self._initialize_service(mii_config) - self._wait_until_server_is_live(processes, - mii_config.model_config.replica_configs) + self._wait_until_server_is_live(processes, mii_config.model_conf.replica_configs) def _wait_until_server_is_live(self, processes, deployment): for process, repl_config in zip(processes, deployment): @@ -128,11 +127,11 @@ def _initialize_service(self, mii_config): ] host_gpus = defaultdict(list) - for repl_config in mii_config.model_config.replica_configs: + for repl_config in mii_config.model_conf.replica_configs: host_gpus[repl_config.hostname].extend(repl_config.gpu_indices) # Start replica instances - for repl_config in mii_config.model_config.replica_configs: + for repl_config in mii_config.model_conf.replica_configs: hostfile = tempfile.NamedTemporaryFile(delete=False) hostfile.write( f"{repl_config.hostname} slots={max(host_gpus[repl_config.hostname])+1}\n" @@ -140,7 +139,7 @@ def _initialize_service(self, mii_config): ds_launch_str = self._generate_ds_launch_str(repl_config, hostfile.name) processes.append( self._launch_server_process( - mii_config.model_config, + mii_config.model_conf, "MII server", ds_launch_str=ds_launch_str, server_args=server_args + @@ -153,7 +152,7 @@ def _initialize_service(self, mii_config): # expected to assign one GPU to one process. processes.append( self._launch_server_process( - mii_config.model_config, + mii_config.model_conf, "load balancer", server_args=server_args + ["--load-balancer"], )) @@ -161,7 +160,7 @@ def _initialize_service(self, mii_config): if mii_config.enable_restful_api: processes.append( self._launch_server_process( - mii_config.model_config, + mii_config.model_conf, "restful api gateway", server_args=server_args + ["--restful-gateway"], )) diff --git a/mii/legacy/utils.py b/mii/legacy/utils.py index f1a7cb59..8d574ad9 100644 --- a/mii/legacy/utils.py +++ b/mii/legacy/utils.py @@ -179,7 +179,7 @@ def extract_query_dict(task, request_dict): def get_num_gpus(mii_config): - num_gpus = mii_config.model_config.tensor_parallel + num_gpus = mii_config.model_conf.tensor_parallel assert ( torch.cuda.device_count() >= num_gpus diff --git a/mii/pydantic_v1.py b/mii/pydantic_v1.py deleted file mode 100644 index 6aba072a..00000000 --- a/mii/pydantic_v1.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team -"""Pydantic v1 compatibility module. - -Pydantic v2 introduced breaking changes that hinder its adoption: -https://docs.pydantic.dev/latest/migration/. To provide deepspeed users the option to -migrate to pydantic v2 on their own timeline, deepspeed uses this compatibility module -as a pydantic-version-agnostic alias for pydantic's v1 API. -""" - -try: - from pydantic.v1 import * # noqa: F401 -except ImportError: - from pydantic import * # noqa: F401 diff --git a/mii/score/generate.py b/mii/score/generate.py index a34a96c6..978a635b 100644 --- a/mii/score/generate.py +++ b/mii/score/generate.py @@ -19,7 +19,7 @@ def create_score_file(mii_config): score_src = fd.read() # update score file w. global config dict - config_dict = mii_config.dict() + config_dict = mii_config.model_dump() source_with_config = f"{score_src}\n" source_with_config += f"mii_config = {pprint.pformat(config_dict, indent=4)}" diff --git a/requirements/requirements.txt b/requirements/requirements.txt index b4191e29..11cf6b83 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,12 +1,12 @@ accelerate asyncio -deepspeed>=0.14.0 +deepspeed>=0.15.0 deepspeed-kernels Flask-RESTful grpcio grpcio-tools Pillow -pydantic +pydantic>=2.0.0 pyzmq safetensors torch diff --git a/tests/legacy/test_config.py b/tests/legacy/test_config.py index bc2ca1fd..f99b2524 100644 --- a/tests/legacy/test_config.py +++ b/tests/legacy/test_config.py @@ -6,24 +6,24 @@ import pytest import mii.legacy as mii -from mii.legacy import pydantic_v1 +from pydantic import ValidationError @pytest.mark.parametrize("port_number", [12345]) @pytest.mark.parametrize("tensor_parallel", [4]) def test_base_configs(deployment_name, mii_config, model_config): mii_config["deployment_name"] = deployment_name - mii_config["model_config"] = model_config + mii_config["model_conf"] = model_config mii_config = mii.config.MIIConfig(**mii_config) assert mii_config.port_number == 12345 - assert mii_config.model_config.tensor_parallel == 4 + assert mii_config.model_conf.tensor_parallel == 4 @pytest.mark.parametrize("port_number", ["fail"]) @pytest.mark.parametrize("tensor_parallel", [3.5]) def test_base_configs_literalfail(deployment_name, mii_config, model_config): - with pytest.raises(pydantic_v1.ValidationError): + with pytest.raises(ValidationError): mii_config["deployment_name"] = deployment_name - mii_config["model_config"] = model_config + mii_config["model_conf"] = model_config mii_config = mii.config.MIIConfig(**mii_config) diff --git a/tests/legacy/test_deployment_options.py b/tests/legacy/test_deployment_options.py index e60ebcd7..2cda7a6f 100644 --- a/tests/legacy/test_deployment_options.py +++ b/tests/legacy/test_deployment_options.py @@ -7,7 +7,7 @@ import json import requests import mii.legacy as mii -from mii.legacy import pydantic_v1 +from pydantic import ValidationError @pytest.mark.deepspeed @@ -81,7 +81,7 @@ def test_zero_config(deployment, query): @pytest.mark.deepspeed -@pytest.mark.parametrize("expected_failure", [pydantic_v1.ValidationError]) +@pytest.mark.parametrize("expected_failure", [ValidationError]) @pytest.mark.parametrize( "enable_deepspeed, enable_zero, dtype", [(True, diff --git a/tests/test_arg_parsing.py b/tests/test_arg_parsing.py index 640512ae..957b1eeb 100644 --- a/tests/test_arg_parsing.py +++ b/tests/test_arg_parsing.py @@ -5,31 +5,33 @@ import pytest +from pydantic import ValidationError + from mii.api import _parse_kwargs_to_model_config, _parse_kwargs_to_mii_config from mii.errors import UnknownArgument def test_model_name_or_path(): # model_name_or_path is required - with pytest.raises(ValueError): + with pytest.raises(ValidationError): _parse_kwargs_to_mii_config() - with pytest.raises(ValueError): + with pytest.raises(ValidationError): _parse_kwargs_to_model_config() # passing model_name_or_path as positional arg mii_config = _parse_kwargs_to_mii_config("test") - assert mii_config.model_config.model_name_or_path == "test" + assert mii_config.model_conf.model_name_or_path == "test" model_config, _ = _parse_kwargs_to_model_config("test") assert model_config.model_name_or_path == "test" # passing model_name_or_path in model_config mii_config = _parse_kwargs_to_mii_config(model_config={"model_name_or_path": "test"}) - assert mii_config.model_config.model_name_or_path == "test" + assert mii_config.model_conf.model_name_or_path == "test" mii_config = _parse_kwargs_to_mii_config( mii_config={"model_config": { "model_name_or_path": "test" }}) - assert mii_config.model_config.model_name_or_path == "test" + assert mii_config.model_conf.model_name_or_path == "test" model_config, _ = _parse_kwargs_to_model_config( model_config={"model_name_or_path": "test"} ) @@ -53,8 +55,8 @@ def test_only_kwargs(): mii_config = _parse_kwargs_to_mii_config("test", tensor_parallel=2, enable_restful_api=True) - assert mii_config.model_config.model_name_or_path == "test" - assert mii_config.model_config.tensor_parallel == 2 + assert mii_config.model_conf.model_name_or_path == "test" + assert mii_config.model_conf.tensor_parallel == 2 assert mii_config.enable_restful_api is True model_config, _ = _parse_kwargs_to_model_config("test", tensor_parallel=2) @@ -70,8 +72,8 @@ def test_only_config_dicts(): "tensor_parallel": 2 }, ) - assert mii_config.model_config.model_name_or_path == "test" - assert mii_config.model_config.tensor_parallel == 2 + assert mii_config.model_conf.model_name_or_path == "test" + assert mii_config.model_conf.tensor_parallel == 2 assert mii_config.enable_restful_api is True mii_config = _parse_kwargs_to_mii_config( @@ -82,8 +84,8 @@ def test_only_config_dicts(): "tensor_parallel": 2 }, }) - assert mii_config.model_config.model_name_or_path == "test" - assert mii_config.model_config.tensor_parallel == 2 + assert mii_config.model_conf.model_name_or_path == "test" + assert mii_config.model_conf.tensor_parallel == 2 assert mii_config.enable_restful_api is True model_config, _ = _parse_kwargs_to_model_config(