-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
280 lines (232 loc) · 11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#
# License: Apache 2.0
#
import os
from os import PathLike
import time
from pathlib import Path
from zipfile import ZipFile
from typing import Iterable, Any
import datasets
import evaluate
import numpy
import nncf
from nncf.parameters import ModelType
import openvino
import torch
from transformers import BertForSequenceClassification, BertTokenizer
# Define routine to download the files
# Note: the file will be saved on the local filesystem to the current directory by default. Define a `directory` as needed to change this behaviour. If a `filename` is not given, the filename of the URL will be used.
#
# :param url: URL that points to the file to download
# :param filename: Name of the local file to save. Should point to the name of the file only,
# not the full path. If None the filename from the url will be used
# :param directory: Directory to save the file to. Will be created if it doesn't exist
# If None the file will be saved to the current working directory
# :param show_progress: If True, show an TQDM ProgressBar
# :param silent: If True, do not print a message if the file already exists
# :param timeout: Number of seconds before cancelling the connection attempt
# :return: path to downloaded file
#
# Source: https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py
#
import urllib.parse
def download_file(
url: PathLike,
filename: PathLike = None,
directory: PathLike = None,
show_progress: bool = True,
silent: bool = False,
timeout: int = 10,
) -> PathLike:
from tqdm import tqdm
import requests
filename = filename or Path(urllib.parse.urlparse(url).path).name
chunk_size = 16384 # make chunks bigger so that not too many updates are triggered for Jupyter front-end
filename = Path(filename)
if len(filename.parts) > 1:
raise ValueError(
"`filename` should refer to the name of the file, excluding the directory. "
"Use the `directory` parameter to specify a target directory for the downloaded file."
)
# create the directory if it does not exist, and add the directory to the filename
if directory is not None:
directory = Path(directory)
directory.mkdir(parents=True, exist_ok=True)
filename = directory / Path(filename)
try:
response = requests.get(url=url, headers={"User-agent": "Mozilla/5.0"}, stream=True)
response.raise_for_status()
except (
requests.exceptions.HTTPError
) as error: # For error associated with not-200 codes. It will output something like: "404 Client Error: Not Found for url: {url}"
raise Exception(error) from None
except requests.exceptions.Timeout:
raise Exception(
"Connection timed out. If you access the internet through a proxy server, please "
"make sure the proxy is set in the shell from where you launched Jupyter."
) from None
except requests.exceptions.RequestException as error:
raise Exception(f"File downloading failed with error: {error}") from None
# download the file, if it does not exist; or download it again if it exists but has an incorrect file size
filesize = int(response.headers.get("Content-length", 0))
if not filename.exists() or (os.stat(filename).st_size != filesize):
with tqdm(
total=filesize,
unit="B",
unit_scale=True,
unit_divisor=1024,
desc=str(filename),
disable=not show_progress,
) as progress_bar:
with open(filename, "wb") as file_object:
for chunk in response.iter_content(chunk_size):
file_object.write(chunk)
progress_bar.update(len(chunk))
progress_bar.refresh()
else:
if not silent:
print(f"'{filename}' already exists.")
response.close()
return filename.resolve()
# Set the data and model directories, source URL and the filename of the model.
MODEL_DIR = "model"
MODEL_LINK = "https://download.pytorch.org/tutorial/MRPC.zip"
FILE_NAME = MODEL_LINK.split("/")[-1]
PRETRAINED_MODEL_DIR = os.path.join(MODEL_DIR, "MRPC")
os.makedirs(MODEL_DIR, exist_ok=True)
# Download and unpack pre-trained BERT model for MRPC by PyTorch
download_file(MODEL_LINK, directory=MODEL_DIR, show_progress=True)
with ZipFile(f"{MODEL_DIR}/{FILE_NAME}", "r") as zip_ref:
zip_ref.extractall(MODEL_DIR)
# Remove MRPC.zip
MODEL_PATH = os.path.join(MODEL_DIR, FILE_NAME)
if os.path.exists(MODEL_DIR):
os.remove(MODEL_PATH)
else:
print("MRPC.zip: the file does not exist.")
# Convert the original PyTorch model to the OpenVINO Intermediate Representation (OpenVINO IR)
# Starting from OpenVINO >=2023.* you can directly convert a model from the PyTorch format to the OpenVINO IR format using model conversion API.
# The following PyTorch model formats are supported:
# - `torch.nn.Module`
# - `torch.jit.ScriptModule`
# - `torch.jit.ScriptFunction`
MAX_SEQ_LENGTH = 128
input_shape = openvino.PartialShape([1, -1])
ir_model_xml = Path(MODEL_DIR) / "bert_mrpc.xml"
core = openvino.Core()
torch_model = BertForSequenceClassification.from_pretrained(PRETRAINED_MODEL_DIR)
torch_model.eval
input_info = [
("input_ids", input_shape, numpy.int64),
("attention_mask", input_shape, numpy.int64),
("token_type_ids", input_shape, numpy.int64),
]
default_input = torch.ones(1, MAX_SEQ_LENGTH, dtype=torch.int64)
inputs = {
"input_ids": default_input,
"attention_mask": default_input,
"token_type_ids": default_input,
}
# Convert the PyTorch model to OpenVINO IR FP32.
if not ir_model_xml.exists():
model = openvino.convert_model(torch_model, example_input=inputs, input=input_info)
openvino.save_model(model, str(ir_model_xml))
else:
model = core.read_model(ir_model_xml)
# Preparing the Dataset: Dowloading readme and data, Splitting: generating train split, validation split, and test split, Mapping
def create_data_source():
raw_dataset = datasets.load_dataset("glue", "mrpc", split="validation")
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_DIR)
def _preprocess_fn(examples):
texts = (examples["sentence1"], examples["sentence2"])
result = tokenizer(*texts, padding="max_length", max_length=MAX_SEQ_LENGTH, truncation=True)
result["labels"] = examples["label"]
return result
processed_dataset = raw_dataset.map(_preprocess_fn, batched=True, batch_size=1)
return processed_dataset
data_source = create_data_source()
# Optimize model using NNCF Post-training Quantization API
INPUT_NAMES = [key for key in inputs.keys()]
# Create the data transformation function
def transform_fn(data_item):
"""
Extract the model's input from the data item. data_item is returned from the data source per iteration.
This function should be passed when the data item cannot be used as model's input.
"""
inputs = {name: numpy.asarray([data_item[name]], dtype=numpy.int64) for name in INPUT_NAMES}
return inputs
# Create a Dataset for quantization
# Create an instance of nncf.Dataset class by passing two parameters:
# - data_source Iterable python object that contains data items for model calibration;
# - transform_fn Data transformation function we create earlier.
calibration_dataset = nncf.Dataset(data_source, transform_fn)
# Quantize the model running nnfc.quantize() to get the optimized model.
# By specifying model_type, we specify additional transformer patterns in the model
quantized_model = nncf.quantize(model, calibration_dataset, model_type=ModelType.TRANSFORMER)
# Serialize OpenVINO IR model using `openvino.save_model` function
compressed_model_xml = Path(MODEL_DIR) / "quantized_bert_mrpc.xml"
openvino.save_model(quantized_model, compressed_model_xml)
# Load and Test OpenVINO Model
compiled_quantized_model = core.compile_model(model=quantized_model, device_name="CPU")
output_layer = compiled_quantized_model.outputs[0]
# From data_source we randomely pick a pair of sentences (indicated by the index `sample_idx`) and the inference compares these sentences and outputs whether their meaning is the same.
from random import randrange
sample_idx = randrange(0, 408)
sample = data_source[sample_idx]
inputs = {k: torch.unsqueeze(torch.tensor(sample[k]), 0) for k in ["input_ids", "token_type_ids", "attention_mask"]}
result = compiled_quantized_model(inputs)[output_layer]
result = numpy.argmax(result)
print(f"Sentence 1: {sample['sentence1']}")
print(f"Sentence 2: {sample['sentence2']}")
print(f"Have the same meaning? {'Yes' if result == 1 else 'No'}")
# Compare F1-score of FP32 and INT8 models
def validate(model: openvino.Model, dataset: Iterable[Any]) -> float:
"""
Evaluate the model on GLUE dataset.
Returns F1 score metric.
"""
compiled_model = core.compile_model(model, device_name="CPU")
output_layer = compiled_model.output(0)
metric = evaluate.load("glue", "mrpc")
for batch in dataset:
inputs = [numpy.expand_dims(numpy.asarray(batch[key], dtype=numpy.int64), 0) for key in INPUT_NAMES]
outputs = compiled_model(inputs)[output_layer]
predictions = outputs[0].argmax(axis=-1)
metric.add_batch(predictions=[predictions], references=[batch["labels"]])
metrics = metric.compute()
f1_score = metrics["f1"]
return f1_score
print("Checking the accuracy of the original model:")
metric = validate(model, data_source)
print(f"F1 score: {metric:.4f}")
print("Checking the accuracy of the quantized model:")
metric = validate(quantized_model, data_source)
print(f"F1 score: {metric:.4f}")
# Compare the original PyTorch model with OpenVINO converted (`FP32`) and quantized models (`INT8`) to see the difference in performance.
# Measurement are expressed by SPS (Sentences Per Second), which is the same as Frames Per Second (FPS) for images.
# Compile the model for CPU device.
compiled_model = core.compile_model(model=model, device_name="CPU")
num_samples = 50
sample = data_source[0]
inputs = {k: torch.unsqueeze(torch.tensor(sample[k]), 0) for k in ["input_ids", "token_type_ids", "attention_mask"]}
with torch.no_grad():
start = time.perf_counter()
for _ in range(num_samples):
torch_model(torch.vstack(list(inputs.values())))
end = time.perf_counter()
time_torch = end - start
print(f"PyTorch model (original) on CPU: {time_torch / num_samples:.3f} seconds per sentence, " f"SPS: {num_samples / time_torch:.2f}")
start = time.perf_counter()
for _ in range(num_samples):
compiled_model(inputs)
end = time.perf_counter()
time_ir = end - start
print(f"IR FP32 model (converted) in OpenVINO Runtime/CPU: {time_ir / num_samples:.3f} " f"seconds per sentence, SPS: {num_samples / time_ir:.2f}")
start = time.perf_counter()
for _ in range(num_samples):
compiled_quantized_model(inputs)
end = time.perf_counter()
time_ir = end - start
print(f"OpenVINO IR INT8 model (quantized) in OpenVINO Runtime/CPU: {time_ir / num_samples:.3f} " f"seconds per sentence, SPS: {num_samples / time_ir:.2f}")
print("You are now ready to evaluate the results using benchmark_app.")