Skip to content

Commit

Permalink
amend doc details (#144)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenglongjiepheonix authored May 31, 2022
1 parent 4486ab6 commit b107203
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 10 deletions.
9 changes: 4 additions & 5 deletions md_doc/inference_with_ncnn.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,21 @@ model_path = '/models/shufflenet-v2-sim.onnx' # onnx simplified model
data_path = '/data/ImageNet/calibration' # calibration data folder
EXECUTING_DEVICE = 'cuda'

# initialize dataloader
# initialize dataloader, suppose preprocessed calibration data is in binary format
INPUT_SHAPE = [1, 3, 224, 224]
npy_array = [np.fromfile(os.path.join(data_path, file_name), dtype=np.float32).reshape(*INPUT_SHAPE) for file_name in os.listdir(data_path)]
dataloader = [torch.from_numpy(np.load(npy_tensor)) for npy_tensor in npy_array]

# confirm platform and setting
target_platform = TargetPlatform.NCNN_INT8
setting = QuantizationSettingFactory.academic_setting() # for ncnn, no fusion
setting = QuantizationSettingFactory.ncnn_setting()

# load and schedule graph
ppq_graph_ir = load_onnx_graph(model_path)
ppq_graph_ir = dispatch_graph(ppq_graph_ir, target_platform, setting)

# intialize quantizer and executor
executor = TorchExecutor(ppq_graph_ir, device='cuda')
executor = TorchExecutor(ppq_graph_ir, device=EXECUTING_DEVICE)
quantizer = QUANTIZER_COLLECTION[target_platform](graph=ppq_graph_ir)

# run quantization
Expand All @@ -61,8 +61,7 @@ quantizer.quantize(
# export quantization param file and model file
export_ppq_graph(graph=ppq_ir_graph, platform=TargetPlatform.NCNN_INT8, graph_save_to='shufflenet-v2-sim-ppq', config_save_to='shufflenet-v2-sim-ppq.table')
```
note that your dataloader should provide batch data which is in the same shape of the input of simplified model, because
simplified model can't take dynamic-shape inputs.
note that your dataloader should provide batch data which is in the same shape of the input of simplified model, because simplified model can't take dynamic-shape inputs.

## Convert Your Model
if you have compiled ncnn correctly, there should be executables in the installation binary folder which can convert onnx model
Expand Down
8 changes: 4 additions & 4 deletions md_doc/inference_with_ppl_cuda.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Inference with PPL CUDA
this tutorial gives you a simple illustration how you could actually use PPQ to quantize your model and export
quantization parameter file to inference with ppl cuda as your backend. Similar to [inference_with_ncnn](./inference_with_ncnn.md), we use an onnx model, shufflenet-v2, as an example here to illustrate the whole process
going from ready-to-quantize model to ready-to-deploy model and corresponding quantization parameter
going from ready-to-quantize model to ready-to-deploy polished onnx model, with quantization parameter file generated

## Quantize Your Network
as we have specified in [how_to_use](./how_to_use.md), we should prepare our calibration dataloader, confirm
Expand All @@ -21,21 +21,21 @@ model_path = '/models/shufflenet-v2.onnx'
data_path = '/data/ImageNet/calibration'
EXECUTING_DEVICE = 'cuda'

# initialize dataloader
# initialize dataloader, suppose preprocessed input data is in binary format
INPUT_SHAPE = [1, 3, 224, 224]
npy_array = [np.fromfile(os.path.join(data_path, file_name), dtype=np.float32).reshape(*INPUT_SHAPE) for file_name in os.listdir(data_path)]
dataloader = [torch.from_numpy(np.load(npy_tensor)) for npy_tensor in npy_array]

# confirm platform and setting
target_platform = TargetPlatform.PPL_CUDA_INT8
setting = QuantizationSettingFactory.pplcuda_setting() # for ncnn, no fusion
setting = QuantizationSettingFactory.pplcuda_setting()

# load and schedule graph
ppq_graph_ir = load_onnx_graph(model_path)
ppq_graph_ir = dispatch_graph(ppq_graph_ir, target_platform, setting)

# intialize quantizer and executor
executor = TorchExecutor(ppq_graph_ir, device='cuda')
executor = TorchExecutor(ppq_graph_ir, device=EXECUTING_DEVICE)
quantizer = QUANTIZER_COLLECTION[target_platform](graph=ppq_graph_ir)

# run quantization
Expand Down
11 changes: 10 additions & 1 deletion ppq/parser/ncnn_exporter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import List

from ppq.core import (DataType, NetworkFramework, QuantizationProperty,
Expand Down Expand Up @@ -46,9 +47,17 @@ def export(self, file_path: str, graph: BaseGraph, config_path: str = None, inpu
if config_path is not None:
self.export_quantization_config(config_path, graph)

_, ext = os.path.splitext(file_path)
if ext == '.onnx':
exporter = OnnxExporter()
exporter.export(file_path=file_path, graph=graph, config_path=None)
elif ext in {'.prototxt', '.caffemodel'}:
exporter = CaffeExporter()
exporter.export(file_path=file_path, graph=graph, config_path=None, input_shapes=input_shapes)

# no pre-determined export format, we export according to the
# original model format
if graph._built_from == NetworkFramework.CAFFE:
elif graph._built_from == NetworkFramework.CAFFE:
exporter = CaffeExporter()
exporter.export(file_path=file_path, graph=graph, config_path=None, input_shapes=input_shapes)

Expand Down

0 comments on commit b107203

Please sign in to comment.