From e82ea7208a8df094ddd46a205606e17dfb11c84e Mon Sep 17 00:00:00 2001 From: zoooo0820 Date: Thu, 27 Jul 2023 11:26:59 +0000 Subject: [PATCH 1/2] remove old dataloader & generator from quantilization --- .../post_training_quantization.py | 55 ++++--------------- .../test_post_training_quantization_while.py | 48 ++++++++++------ 2 files changed, 43 insertions(+), 60 deletions(-) diff --git a/python/paddle/static/quantization/post_training_quantization.py b/python/paddle/static/quantization/post_training_quantization.py index 266c1756a334ba..61f1cdf6fe5ffa 100644 --- a/python/paddle/static/quantization/post_training_quantization.py +++ b/python/paddle/static/quantization/post_training_quantization.py @@ -23,12 +23,10 @@ except: from .utils import tqdm -from inspect import isgeneratorfunction from paddle.fluid.framework import IrGraph, _get_var from ... import io, static -from ...fluid import reader from ...framework import core from ...utils import unique_name from ..log_helper import get_logger @@ -171,16 +169,16 @@ def __init__( When all parameters were saved in a single binary file, set it as the real filename. If parameters were saved in separate files, set it as 'None'. Default is 'None'. - batch_generator(Python Generator): The batch generator provides + batch_generator(Python Generator, depreceated): The batch generator provides calibrate data for DataLoader, and it returns a batch every time. Note that, sample_generator and batch_generator, only one should be set. Beisdes, batch_generator supports lod tensor. - sample_generator(Python Generator): The sample generator provides + sample_generator(Python Generator, depreceated): The sample generator provides calibrate data for DataLoader, and it only returns a sample every time. Note that, sample_generator and batch_generator, only one should be set. Beisdes, sample_generator dose not support lod tensor. - data_loader(Python Generator, Paddle.io.DataLoader, optional): The - Generator or Dataloader provides calibrate data, and it could + data_loader(Paddle.io.DataLoader): The + Dataloader provides calibrate data, and it could return a batch every time. batch_size(int, optional): The batch size of DataLoader. Default is 10. batch_nums(int, optional): If batch_nums is not None, the number of @@ -309,22 +307,12 @@ def __init__( # Check inputs assert executor is not None, "The executor cannot be None." - assert any( - [gen is not None] - for gen in [sample_generator, batch_generator, data_loader] - ), ( - "The sample_generator, batch_generator " - "and data_loader cannot be None in the same time." - ) - if data_loader is not None: - assert isinstance( - data_loader, - ( - io.DataLoader, - type(isgeneratorfunction), - reader.GeneratorLoader, - ), - ), "data_loader only accepts `paddle.io.DataLoader` or Generator instance." + assert data_loader is not None, "data_loader cannot be None." + + assert isinstance( + data_loader, io.DataLoader + ), "data_loader only accepts `paddle.io.DataLoader`." + assert batch_size > 0, "The batch_size should be greater than 0." assert ( algo in self._support_algo_type @@ -615,29 +603,8 @@ def _load_model_data(self): for var_name in self._feed_list ] - if self._data_loader is not None: - self._batch_nums = ( - self._batch_nums if self._batch_nums else len(self._data_loader) - ) - return - self._data_loader = reader.DataLoader.from_generator( - feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True - ) - if self._sample_generator is not None: - self._data_loader.set_sample_generator( - self._sample_generator, - batch_size=self._batch_size, - drop_last=True, - places=self._place, - ) - elif self._batch_generator is not None: - self._data_loader.set_batch_generator( - self._batch_generator, places=self._place - ) self._batch_nums = ( - self._batch_nums - if self._batch_nums - else len(list(self._data_loader)) + self._batch_nums if self._batch_nums else len(self._data_loader) ) def _optimize_fp32_model(self): diff --git a/test/quantization/test_post_training_quantization_while.py b/test/quantization/test_post_training_quantization_while.py index ab80930586c100..378e89792994e2 100644 --- a/test/quantization/test_post_training_quantization_while.py +++ b/test/quantization/test_post_training_quantization_while.py @@ -29,6 +29,23 @@ np.random.seed(0) +class TransedMnistDataSet(paddle.io.Dataset): + def __init__(self, mnist_data): + self.mnist_data = mnist_data + + def __getitem__(self, idx): + img = ( + np.array(self.mnist_data[idx][0]) + .astype('float32') + .reshape(1, 28, 28) + ) + batch = img / 127.5 - 1.0 + return {"x": batch} + + def __len__(self): + return len(self.mnist_data) + + class TestPostTrainingQuantization(unittest.TestCase): def setUp(self): self.download_path = 'int8/download' @@ -132,28 +149,30 @@ def generate_quantized_model( is_optimize_model=False, batch_size=10, batch_nums=10, - is_data_loader=False, ): place = paddle.CPUPlace() exe = paddle.static.Executor(place) - val_reader = paddle.dataset.mnist.train() - - def val_data_generator(): - batches = [] - for data in val_reader(): - batches.append(data[0].reshape(1, 28, 28)) - if len(batches) == batch_size: - batches = np.asarray(batches) - yield {"x": batches} - batches = [] + # val_reader = paddle.dataset.mnist.train() + train_dataset = paddle.vision.datasets.MNIST( + mode='train', transform=None + ) + train_dataset = TransedMnistDataSet(train_dataset) + BatchSampler = paddle.io.BatchSampler( + train_dataset, batch_size=batch_size + ) + val_data_generator = paddle.io.DataLoader( + train_dataset, + batch_sampler=BatchSampler, + places=paddle.static.cpu_places(), + ) ptq = PostTrainingQuantization( executor=exe, model_dir=model_path, model_filename='model.pdmodel', params_filename='model.pdiparams', - sample_generator=val_reader if not is_data_loader else None, - data_loader=val_data_generator if is_data_loader else None, + sample_generator=None, + data_loader=val_data_generator, batch_size=batch_size, batch_nums=batch_nums, algo=algo, @@ -183,7 +202,6 @@ def run_test( batch_size=10, infer_iterations=10, quant_iterations=5, - is_data_loader=False, ): origin_model_path = self.download_model(data_url, data_md5, model_name) @@ -210,7 +228,6 @@ def run_test( is_optimize_model, batch_size, quant_iterations, - is_data_loader=is_data_loader, ) print( @@ -442,7 +459,6 @@ def test_post_training_abs_max(self): batch_size, infer_iterations, quant_iterations, - is_data_loader=True, ) From 17eb773265072fc33f14a98c2d3d5dd0d88fa458 Mon Sep 17 00:00:00 2001 From: zoooo0820 Date: Thu, 27 Jul 2023 12:50:29 +0000 Subject: [PATCH 2/2] fix ut test_post_training_quantization_mnist --- .../test_post_training_quantization_mnist.py | 34 +++++++++++++++++-- .../test_post_training_quantization_while.py | 2 +- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/test/quantization/test_post_training_quantization_mnist.py b/test/quantization/test_post_training_quantization_mnist.py index 51ccda62dff6cd..cef001a8a60ef1 100644 --- a/test/quantization/test_post_training_quantization_mnist.py +++ b/test/quantization/test_post_training_quantization_mnist.py @@ -30,6 +30,23 @@ np.random.seed(0) +class TransedMnistDataSet(paddle.io.Dataset): + def __init__(self, mnist_data): + self.mnist_data = mnist_data + + def __getitem__(self, idx): + img = ( + np.array(self.mnist_data[idx][0]) + .astype('float32') + .reshape(1, 28, 28) + ) + batch = img / 127.5 - 1.0 + return {"img": batch} + + def __len__(self): + return len(self.mnist_data) + + class TestPostTrainingQuantization(unittest.TestCase): def setUp(self): self.root_path = tempfile.TemporaryDirectory() @@ -217,14 +234,27 @@ def generate_quantized_model( ): place = paddle.CPUPlace() exe = paddle.static.Executor(place) - val_reader = paddle.dataset.mnist.train() + + train_dataset = paddle.vision.datasets.MNIST( + mode='train', transform=None + ) + train_dataset = TransedMnistDataSet(train_dataset) + BatchSampler = paddle.io.BatchSampler( + train_dataset, batch_size=batch_size + ) + val_data_generator = paddle.io.DataLoader( + train_dataset, + batch_sampler=BatchSampler, + places=paddle.static.cpu_places(), + ) ptq = PostTrainingQuantization( executor=exe, model_dir=model_path, model_filename=model_filename, params_filename=params_filename, - sample_generator=val_reader, + sample_generator=None, + data_loader=val_data_generator, batch_size=batch_size, batch_nums=batch_nums, algo=algo, diff --git a/test/quantization/test_post_training_quantization_while.py b/test/quantization/test_post_training_quantization_while.py index 378e89792994e2..d515fc59cd4f91 100644 --- a/test/quantization/test_post_training_quantization_while.py +++ b/test/quantization/test_post_training_quantization_while.py @@ -152,7 +152,7 @@ def generate_quantized_model( ): place = paddle.CPUPlace() exe = paddle.static.Executor(place) - # val_reader = paddle.dataset.mnist.train() + train_dataset = paddle.vision.datasets.MNIST( mode='train', transform=None )