-
Notifications
You must be signed in to change notification settings - Fork 63
/
__main__.py
348 lines (300 loc) · 13.6 KB
/
__main__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# coding=utf-8
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Entry point to the optimum.exporters.neuron command line."""
import argparse
import inspect
import os
from argparse import ArgumentParser
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
from requests.exceptions import ConnectionError as RequestsConnectionError
from transformers import AutoConfig
from ...neuron.utils import (
DIFFUSION_MODEL_TEXT_ENCODER_2_NAME,
DIFFUSION_MODEL_TEXT_ENCODER_NAME,
DIFFUSION_MODEL_UNET_NAME,
DIFFUSION_MODEL_VAE_DECODER_NAME,
DIFFUSION_MODEL_VAE_ENCODER_NAME,
NEURON_FILE_NAME,
is_neuron_available,
is_neuronx_available,
)
from ...neuron.utils.version_utils import check_compiler_compatibility_for_stable_diffusion
from ...utils import is_diffusers_available, logging
from ...utils.save_utils import maybe_save_preprocessors
from ..error_utils import AtolError, OutputMatchError, ShapeError
from ..tasks import TasksManager
from .convert import export_models, validate_models_outputs
from .model_configs import * # noqa: F403
from .utils import (
build_stable_diffusion_components_mandatory_shapes,
get_stable_diffusion_models_for_export,
replace_stable_diffusion_submodels,
)
if is_neuron_available():
from ...commands.export.neuron import parse_args_neuron
NEURON_COMPILER = "Neuron"
if is_neuronx_available():
from ...commands.export.neuronx import parse_args_neuronx as parse_args_neuron # noqa: F811
NEURON_COMPILER = "Neuronx"
if is_diffusers_available():
from diffusers import StableDiffusionXLPipeline
if TYPE_CHECKING:
if is_diffusers_available():
from diffusers import StableDiffusionPipeline
logger = logging.get_logger()
logger.setLevel(logging.INFO)
def infer_compiler_kwargs(args: argparse.Namespace) -> Dict[str, Any]:
# infer compiler kwargs
auto_cast = None if args.auto_cast == "none" else args.auto_cast
auto_cast_type = None if auto_cast is None else args.auto_cast_type
compiler_kwargs = {"auto_cast": auto_cast, "auto_cast_type": auto_cast_type}
if hasattr(args, "disable_fast_relayout"):
compiler_kwargs["disable_fast_relayout"] = getattr(args, "disable_fast_relayout")
if hasattr(args, "disable_fallback"):
compiler_kwargs["disable_fallback"] = getattr(args, "disable_fallback")
return compiler_kwargs
def infer_task(task: str, model_name_or_path: str) -> str:
if task == "auto":
try:
task = TasksManager.infer_task_from_model(model_name_or_path)
except KeyError as e:
raise KeyError(
"The task could not be automatically inferred. Please provide the argument --task with the task "
f"from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
)
except RequestsConnectionError as e:
raise RequestsConnectionError(
f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
)
return task
def normalize_input_shapes(task: str, args: argparse.Namespace) -> Dict[str, int]:
config = AutoConfig.from_pretrained(args.model)
model_type = config.model_type.replace("_", "-")
neuron_config_constructor = TasksManager.get_exporter_config_constructor(
model_type=model_type, exporter="neuron", task=task
)
mandatory_axes = neuron_config_constructor.func.get_mandatory_axes_for_task(task)
input_shapes = {name: getattr(args, name) for name in mandatory_axes}
return input_shapes
def normalize_stable_diffusion_input_shapes(
args: argparse.Namespace,
) -> Dict[str, Dict[str, int]]:
args = vars(args) if isinstance(args, argparse.Namespace) else args
mandatory_axes = set(getattr(inspect.getfullargspec(build_stable_diffusion_components_mandatory_shapes), "args"))
# Remove `sequence_length` as diffusers will pad it to the max and remove number of channels.
mandatory_axes = mandatory_axes - {
"sequence_length",
"unet_num_channels",
"vae_encoder_num_channels",
"vae_decoder_num_channels",
"num_images_per_prompt", # default to 1
}
if not mandatory_axes.issubset(set(args.keys())):
raise AttributeError(
f"Shape of {mandatory_axes} are mandatory for neuron compilation, while {mandatory_axes.difference(args.keys())} are not given."
)
mandatory_shapes = {name: args[name] for name in mandatory_axes}
mandatory_shapes["num_images_per_prompt"] = args.get("num_images_per_prompt", 1)
input_shapes = build_stable_diffusion_components_mandatory_shapes(**mandatory_shapes)
return input_shapes
def infer_stable_diffusion_shapes_from_diffusers(
input_shapes: Dict[str, Dict[str, int]],
model: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
):
if model.tokenizer is not None:
sequence_length = model.tokenizer.model_max_length
elif hasattr(model, "tokenizer_2") and model.tokenizer_2 is not None:
sequence_length = model.tokenizer_2.model_max_length
else:
raise AttributeError(f"Cannot infer sequence_length from {type(model)} as there is no tokenizer as attribute.")
unet_num_channels = model.unet.config.in_channels
vae_encoder_num_channels = model.vae.config.in_channels
vae_decoder_num_channels = model.vae.config.latent_channels
vae_scale_factor = 2 ** (len(model.vae.config.block_out_channels) - 1) or 8
height = input_shapes["unet_input_shapes"]["height"]
scaled_height = height // vae_scale_factor
width = input_shapes["unet_input_shapes"]["width"]
scaled_width = width // vae_scale_factor
input_shapes["text_encoder_input_shapes"].update({"sequence_length": sequence_length})
input_shapes["unet_input_shapes"].update(
{
"sequence_length": sequence_length,
"num_channels": unet_num_channels,
"height": scaled_height,
"width": scaled_width,
}
)
input_shapes["vae_encoder_input_shapes"].update(
{"num_channels": vae_encoder_num_channels, "height": height, "width": width}
)
input_shapes["vae_decoder_input_shapes"].update(
{"num_channels": vae_decoder_num_channels, "height": scaled_height, "width": scaled_width}
)
return input_shapes
def main_export(
model_name_or_path: str,
output: Union[str, Path],
compiler_kwargs: Dict[str, Any],
task: str = "auto",
dynamic_batch_size: bool = False,
atol: Optional[float] = None,
cache_dir: Optional[str] = None,
trust_remote_code: bool = False,
subfolder: str = "",
revision: str = "main",
force_download: bool = False,
local_files_only: bool = False,
use_auth_token: Optional[Union[bool, str]] = None,
do_validation: bool = True,
submodels: Dict[str, Union[Path, str]] = None,
**input_shapes,
):
output = Path(output)
if not output.parent.exists():
output.parent.mkdir(parents=True)
task = TasksManager.map_from_synonym(task)
model_kwargs = {
"task": task,
"model_name_or_path": model_name_or_path,
"subfolder": subfolder,
"revision": revision,
"cache_dir": cache_dir,
"use_auth_token": use_auth_token,
"local_files_only": local_files_only,
"force_download": force_download,
"trust_remote_code": trust_remote_code,
"framework": "pt",
}
model = TasksManager.get_model_from_task(**model_kwargs)
is_stable_diffusion = "stable-diffusion" in task
if not is_stable_diffusion:
neuron_config_constructor = TasksManager.get_exporter_config_constructor(
model=model, exporter="neuron", task=task
)
neuron_config = neuron_config_constructor(model.config, dynamic_batch_size=dynamic_batch_size, **input_shapes)
if atol is None:
atol = neuron_config.ATOL_FOR_VALIDATION
model_name = model.name_or_path.split("/")[-1]
output_model_names = {model_name: "model.neuron"}
models_and_neuron_configs = {model_name: (model, neuron_config)}
maybe_save_preprocessors(model, output.parent)
if is_stable_diffusion:
model = replace_stable_diffusion_submodels(model, submodels)
check_compiler_compatibility_for_stable_diffusion()
if is_neuron_available():
raise RuntimeError(
"Stable diffusion export is not supported by neuron-cc on inf1, please use neuronx-cc on either inf2/trn1 instead."
)
input_shapes = infer_stable_diffusion_shapes_from_diffusers(input_shapes, model)
# Saving the model config and preprocessor as this is needed sometimes.
model.scheduler.save_pretrained(output.joinpath("scheduler"))
if hasattr(model, "tokenizer") and model.tokenizer is not None:
model.tokenizer.save_pretrained(output.joinpath("tokenizer"))
if hasattr(model, "tokenizer_2") and model.tokenizer_2 is not None:
model.tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
if hasattr(model, "feature_extractor") and model.feature_extractor is not None:
model.feature_extractor.save_pretrained(output.joinpath("feature_extractor"))
model.save_config(output)
models_and_neuron_configs = get_stable_diffusion_models_for_export(
pipeline=model,
task=task,
dynamic_batch_size=dynamic_batch_size,
**input_shapes,
)
output_model_names = {
DIFFUSION_MODEL_UNET_NAME: os.path.join(DIFFUSION_MODEL_UNET_NAME, NEURON_FILE_NAME),
DIFFUSION_MODEL_VAE_ENCODER_NAME: os.path.join(DIFFUSION_MODEL_VAE_ENCODER_NAME, NEURON_FILE_NAME),
DIFFUSION_MODEL_VAE_DECODER_NAME: os.path.join(DIFFUSION_MODEL_VAE_DECODER_NAME, NEURON_FILE_NAME),
}
if hasattr(model, "text_encoder") and model.text_encoder is not None:
output_model_names[DIFFUSION_MODEL_TEXT_ENCODER_NAME] = os.path.join(
DIFFUSION_MODEL_TEXT_ENCODER_NAME, NEURON_FILE_NAME
)
if hasattr(model, "text_encoder_2") and model.text_encoder_2 is not None:
output_model_names[DIFFUSION_MODEL_TEXT_ENCODER_2_NAME] = os.path.join(
DIFFUSION_MODEL_TEXT_ENCODER_2_NAME, NEURON_FILE_NAME
)
del model
_, neuron_outputs = export_models(
models_and_neuron_configs=models_and_neuron_configs,
output_dir=output,
output_file_names=output_model_names,
compiler_kwargs=compiler_kwargs,
)
# Validate compiled model
if do_validation is True:
if is_stable_diffusion:
# Do not validate vae encoder due to the sampling randomness
del neuron_outputs[-2] # -2 is the index of `vae_encoder`
models_and_neuron_configs.pop("vae_encoder", None)
output_model_names.pop("vae_encoder", None)
try:
validate_models_outputs(
models_and_neuron_configs=models_and_neuron_configs,
neuron_named_outputs=neuron_outputs,
output_dir=output,
atol=atol,
neuron_files_subpaths=output_model_names,
)
logger.info(
f"The {NEURON_COMPILER} export succeeded and the exported model was saved at: " f"{output.as_posix()}"
)
except ShapeError as e:
raise e
except AtolError as e:
logger.warning(
f"The {NEURON_COMPILER} export succeeded with the warning: {e}.\n The exported model was saved at: "
f"{output.as_posix()}"
)
except OutputMatchError as e:
logger.warning(
f"The {NEURON_COMPILER} export succeeded with the warning: {e}.\n The exported model was saved at: "
f"{output.as_posix()}"
)
except Exception as e:
logger.error(
f"An error occured with the error message: {e}.\n The exported model was saved at: "
f"{output.as_posix()}"
)
def main():
parser = ArgumentParser(f"Hugging Face Optimum {NEURON_COMPILER} exporter")
parse_args_neuron(parser)
# Retrieve CLI arguments
args = parser.parse_args()
task = infer_task(args.task, args.model)
is_stable_diffusion = "stable-diffusion" in task
compiler_kwargs = infer_compiler_kwargs(args)
if is_stable_diffusion:
input_shapes = normalize_stable_diffusion_input_shapes(args)
submodels = {"unet": args.unet}
else:
input_shapes = normalize_input_shapes(task, args)
submodels = None
main_export(
model_name_or_path=args.model,
output=args.output,
compiler_kwargs=compiler_kwargs,
task=task,
dynamic_batch_size=args.dynamic_batch_size,
atol=args.atol,
cache_dir=args.cache_dir,
trust_remote_code=args.trust_remote_code,
do_validation=not args.disable_validation,
submodels=submodels,
**input_shapes,
)
if __name__ == "__main__":
main()