diff --git a/docs/benchmarks/image_classification/get-resnet50-data.md b/docs/benchmarks/image_classification/get-resnet50-data.md new file mode 100644 index 000000000..d1f83ae9a --- /dev/null +++ b/docs/benchmarks/image_classification/get-resnet50-data.md @@ -0,0 +1,43 @@ +# Image Classification using ResNet50 + +## Dataset + +The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. + +=== "Validation" + ResNet50 validation run uses the Imagenet 2012 validation dataset consisting of 50,000 images. + + ### Get Validation Dataset + ``` + cm run script --tags=get,dataset,imagenet,validation -j + ``` +=== "Calibration" + ResNet50 calibration dataset consist of 500 images selected from the Imagenet 2012 validation dataset. There are 2 alternative options for the calibration dataset. + + ### Get Calibration Dataset Using Option 1 + ``` + cm run script --tags=get,dataset,imagenet,calibration,_mlperf.option1 -j + ``` + ### Get Calibration Dataset Using Option 2 + ``` + cm run script --tags=get,dataset,imagenet,calibration,_mlperf.option2 -j + ``` + +## Model +The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. + +Get the Official MLPerf ResNet50 Model + +=== "Tensorflow" + + ### Tensorflow + ``` + cm run script --tags=get,ml-model,resnet50,_tensorflow -j + ``` +=== "Onnx" + + ### Onnx + ``` + cm run script --tags=get,ml-model,resnet50,_onnx -j + ``` + diff --git a/docs/benchmarks/image_classification/mobilenets.md b/docs/benchmarks/image_classification/mobilenets.md new file mode 100644 index 000000000..1907c2a41 --- /dev/null +++ b/docs/benchmarks/image_classification/mobilenets.md @@ -0,0 +1,59 @@ +# Image Classification using Mobilenet models + +Mobilenet models are not official MLPerf models and so cannot be used for a Closed division MLPerf inference submission. But since they can be run with Imagenet dataset, we are allowed to use them for Open division submission. Only CPU runs are supported now. + +## TFLite Backend + +=== "Mobilenet-V1" + ### Mobilenet V1 + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_mobilenet-v1 --adr.compiler.tags=gcc + ``` +=== "Mobilenet-V2" + ### Mobilenet V2 + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_mobilenet-v2 --adr.compiler.tags=gcc + ``` +=== "Mobilenet-V2" + ### Mobilenet V2 + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_mobilenet-v2 --adr.compiler.tags=gcc + ``` +=== "Mobilenets" + ### Mobilenet V1,V2,V3 + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_mobilenet --adr.compiler.tags=gcc + ``` +=== "Efficientnet" + ### Efficientnet + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_efficientnet --adr.compiler.tags=gcc + ``` + +## ARMNN Backend +=== "Mobilenet-V1" + ### Mobilenet V1 + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet-v1 --adr.compiler.tags=gcc + ``` +=== "Mobilenet-V2" + ### Mobilenet V2 + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet-v2 --adr.compiler.tags=gcc + ``` +=== "Mobilenet-V2" + ### Mobilenet V2 + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet-v2 --adr.compiler.tags=gcc + ``` +=== "Mobilenets" + ### Mobilenet V1,V2,V3 + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet --adr.compiler.tags=gcc + ``` +=== "Efficientnet" + ### Efficientnet + ```bash + cm run script --tags=run,mobilenet-models,_tflite,_armnn,_efficientnet --adr.compiler.tags=gcc + ``` + diff --git a/docs/benchmarks/image_classification/resnet50.md b/docs/benchmarks/image_classification/resnet50.md index 1a77db65a..26875258d 100644 --- a/docs/benchmarks/image_classification/resnet50.md +++ b/docs/benchmarks/image_classification/resnet50.md @@ -1,68 +1,26 @@ # Image Classification using ResNet50 -## Dataset - -The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. - -=== "Validation" - ResNet50 validation run uses the Imagenet 2012 validation dataset consisting of 50,000 images. - - ### Get Validation Dataset - ``` - cm run script --tags=get,dataset,imagenet,validation -j - ``` -=== "Calibration" - ResNet50 calibration dataset consist of 500 images selected from the Imagenet 2012 validation dataset. There are 2 alternative options for the calibration dataset. - - ### Get Calibration Dataset Using Option 1 - ``` - cm run script --tags=get,dataset,imagenet,calibration,_mlperf.option1 -j - ``` - ### Get Calibration Dataset Using Option 2 - ``` - cm run script --tags=get,dataset,imagenet,calibration,_mlperf.option2 -j - ``` - -## Model -The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. - -Get the Official MLPerf ResNet50 Model - -=== "Tensorflow" - - ### Tensorflow - ``` - cm run script --tags=get,ml-model,resnet50,_tensorflow -j - ``` -=== "Onnx" - - ### Onnx - ``` - cm run script --tags=get,ml-model,resnet50,_onnx -j - ``` - -## Benchmark Implementations === "MLCommons-Python" - ### MLPerf Reference Implementation in Python + ## MLPerf Reference Implementation in Python {{ mlperf_inference_implementation_readme (4, "resnet50", "reference") }} === "Nvidia" - ### Nvidia MLPerf Implementation + ## Nvidia MLPerf Implementation {{ mlperf_inference_implementation_readme (4, "resnet50", "nvidia") }} === "Intel" - ### Intel MLPerf Implementation + ## Intel MLPerf Implementation {{ mlperf_inference_implementation_readme (4, "resnet50", "intel") }} === "Qualcomm" - ### Qualcomm AI100 MLPerf Implementation + ## Qualcomm AI100 MLPerf Implementation {{ mlperf_inference_implementation_readme (4, "resnet50", "qualcomm") }} -=== "MLCommon-C++" - ### MLPerf Modular Implementation in C++ +=== "MLCommons-C++" + ## MLPerf Modular Implementation in C++ {{ mlperf_inference_implementation_readme (4, "resnet50", "cpp") }} diff --git a/docs/benchmarks/language/bert.md b/docs/benchmarks/language/bert.md index e2aa0995d..782340b3d 100644 --- a/docs/benchmarks/language/bert.md +++ b/docs/benchmarks/language/bert.md @@ -1,44 +1,7 @@ # Question Answering using Bert-Large -## Dataset - -The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. - -=== "Validation" - BERT validation run uses the SQuAD v1.1 dataset. - - ### Get Validation Dataset - ``` - cm run script --tags=get,dataset,squad,validation -j - ``` - -## Model -The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. - -Get the Official MLPerf Bert-Large Model - -=== "Pytorch" - - ### Pytorch - ``` - cm run script --tags=get,ml-model,bert-large,_pytorch -j - ``` -=== "Onnx" - - ### Onnx - ``` - cm run script --tags=get,ml-model,bert-large,_onnx -j - ``` -=== "Tensorflow" - - ### Tensorflow - ``` - cm run script --tags=get,ml-model,bert-large,_tensorflow -j - ``` - -## Benchmark Implementations === "MLCommons-Python" - ### MLPerf Reference Implementation in Python + ## MLPerf Reference Implementation in Python BERT-99 {{ mlperf_inference_implementation_readme (4, "bert-99", "reference") }} @@ -47,7 +10,7 @@ Get the Official MLPerf Bert-Large Model {{ mlperf_inference_implementation_readme (4, "bert-99.9", "reference") }} === "Nvidia" - ### Nvidia MLPerf Implementation + ## Nvidia MLPerf Implementation BERT-99 {{ mlperf_inference_implementation_readme (4, "bert-99", "nvidia") }} @@ -56,7 +19,7 @@ Get the Official MLPerf Bert-Large Model {{ mlperf_inference_implementation_readme (4, "bert-99.9", "nvidia") }} === "Intel" - ### Intel MLPerf Implementation + ## Intel MLPerf Implementation BERT-99 {{ mlperf_inference_implementation_readme (4, "bert-99", "intel") }} @@ -64,7 +27,7 @@ Get the Official MLPerf Bert-Large Model {{ mlperf_inference_implementation_readme (4, "bert-99.9", "intel") }} === "Qualcomm" - ### Qualcomm AI100 MLPerf Implementation + ## Qualcomm AI100 MLPerf Implementation BERT-99 {{ mlperf_inference_implementation_readme (4, "bert-99", "qualcomm") }} diff --git a/docs/benchmarks/language/get-bert-data.md b/docs/benchmarks/language/get-bert-data.md new file mode 100644 index 000000000..f5462b181 --- /dev/null +++ b/docs/benchmarks/language/get-bert-data.md @@ -0,0 +1,38 @@ +# Question Answering using Bert-Large + +## Dataset + +The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. + +=== "Validation" + BERT validation run uses the SQuAD v1.1 dataset. + + ### Get Validation Dataset + ``` + cm run script --tags=get,dataset,squad,validation -j + ``` + +## Model +The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. + +Get the Official MLPerf Bert-Large Model + +=== "Pytorch" + + ### Pytorch + ``` + cm run script --tags=get,ml-model,bert-large,_pytorch -j + ``` +=== "Onnx" + + ### Onnx + ``` + cm run script --tags=get,ml-model,bert-large,_onnx -j + ``` +=== "Tensorflow" + + ### Tensorflow + ``` + cm run script --tags=get,ml-model,bert-large,_tensorflow -j + ``` + diff --git a/docs/benchmarks/language/get-gptj-data.md b/docs/benchmarks/language/get-gptj-data.md new file mode 100644 index 000000000..9ea31feb4 --- /dev/null +++ b/docs/benchmarks/language/get-gptj-data.md @@ -0,0 +1,25 @@ +# Text Summarization using GPT-J + +## Dataset + +The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. + +=== "Validation" + GPT-J validation run uses the CNNDM dataset. + + ### Get Validation Dataset + ``` + cm run script --tags=get,dataset,cnndm,validation -j + ``` + +## Model +The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. + +Get the Official MLPerf GPT-J Model + +=== "Pytorch" + + ### Pytorch + ``` + cm run script --tags=get,ml-model,gptj,_pytorch -j + ``` diff --git a/docs/benchmarks/language/get-llama2-70b-data.md b/docs/benchmarks/language/get-llama2-70b-data.md new file mode 100644 index 000000000..4b04f7068 --- /dev/null +++ b/docs/benchmarks/language/get-llama2-70b-data.md @@ -0,0 +1,26 @@ +# Text Summarization using LLAMA2-70b + +## Dataset + +The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. + +=== "Validation" + LLAMA2-70b validation run uses the Open ORCA dataset. + + ### Get Validation Dataset + ``` + cm run script --tags=get,dataset,openorca,validation -j + ``` + +## Model +The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. + +Get the Official MLPerf LLAMA2-70b Model + +=== "Pytorch" + + ### Pytorch + ``` + cm run script --tags=get,ml-model,llama2-70b,_pytorch -j + ``` + diff --git a/docs/benchmarks/language/gpt-j.md b/docs/benchmarks/language/gpt-j.md index d1c351214..2eefbbc79 100644 --- a/docs/benchmarks/language/gpt-j.md +++ b/docs/benchmarks/language/gpt-j.md @@ -1,57 +1,39 @@ # Text Summarization using GPT-J -## Dataset -The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. - -=== "Validation" - GPT-J validation run uses the CNNDM dataset. - - ### Get Validation Dataset - ``` - cm run script --tags=get,dataset,cnndm,validation -j - ``` - -## Model -The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. - -Get the Official MLPerf GPT-J Model - -=== "Pytorch" - - ### Pytorch - ``` - cm run script --tags=get,ml-model,gptj,_pytorch -j - ``` - -## Benchmark Implementations === "MLCommons-Python" - ### MLPerf Reference Implementation in Python + ## MLPerf Reference Implementation in Python GPT-J-99 + {{ mlperf_inference_implementation_readme (4, "gptj-99", "reference") }} GPTJ-99.9 + {{ mlperf_inference_implementation_readme (4, "gptj-99.9", "reference") }} === "Nvidia" - ### Nvidia MLPerf Implementation + ## Nvidia MLPerf Implementation GPTJ-99 + {{ mlperf_inference_implementation_readme (4, "gptj-99", "nvidia") }} GPTJ-99.9 + {{ mlperf_inference_implementation_readme (4, "gptj-99.9", "nvidia") }} === "Intel" - ### Intel MLPerf Implementation + ## Intel MLPerf Implementation GPTJ-99 + {{ mlperf_inference_implementation_readme (4, "gptj-99", "intel") }} === "Qualcomm" - ### Qualcomm AI100 MLPerf Implementation + ## Qualcomm AI100 MLPerf Implementation GPTJ-99 + {{ mlperf_inference_implementation_readme (4, "gptj-99", "qualcomm") }} diff --git a/docs/benchmarks/language/llama2-70b.md b/docs/benchmarks/language/llama2-70b.md index 7f8052aef..f1785dcb3 100644 --- a/docs/benchmarks/language/llama2-70b.md +++ b/docs/benchmarks/language/llama2-70b.md @@ -1,32 +1,8 @@ # Text Summarization using LLAMA2-70b -## Dataset -The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. - -=== "Validation" - LLAMA2-70b validation run uses the Open ORCA dataset. - - ### Get Validation Dataset - ``` - cm run script --tags=get,dataset,openorca,validation -j - ``` - -## Model -The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. - -Get the Official MLPerf LLAMA2-70b Model - -=== "Pytorch" - - ### Pytorch - ``` - cm run script --tags=get,ml-model,llama2-70b,_pytorch -j - ``` - -## Benchmark Implementations === "MLCommons-Python" - ### MLPerf Reference Implementation in Python + ## MLPerf Reference Implementation in Python LLAMA2-70b-99 {{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "reference") }} @@ -35,7 +11,7 @@ Get the Official MLPerf LLAMA2-70b Model {{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "reference") }} === "Nvidia" - ### Nvidia MLPerf Implementation + ## Nvidia MLPerf Implementation LLAMA2-70b-99 {{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "nvidia") }} @@ -45,7 +21,7 @@ Get the Official MLPerf LLAMA2-70b Model === "Qualcomm" - ### Qualcomm AI100 MLPerf Implementation + ## Qualcomm AI100 MLPerf Implementation LLAMA2-70b-99 {{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "qualcomm") }} diff --git a/docs/benchmarks/medical_imaging/3d-unet.md b/docs/benchmarks/medical_imaging/3d-unet.md index bd3ccae40..b58ea7f2e 100644 --- a/docs/benchmarks/medical_imaging/3d-unet.md +++ b/docs/benchmarks/medical_imaging/3d-unet.md @@ -1,60 +1,33 @@ # Medical Imaging using 3d-unet (KiTS 2019 kidney tumor segmentation task) -## Dataset -The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. - -=== "Validation" - 3d-unet validation run uses the KiTS19 dataset performing [KiTS 2019](https://kits19.grand-challenge.org/) kidney tumor segmentation task - - ### Get Validation Dataset - ``` - cm run script --tags=get,dataset,kits19,validation -j - ``` - -## Model -The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. - -Get the Official MLPerf 3d-unet Model - -=== "Pytorch" - - ### Pytorch - ``` - cm run script --tags=get,ml-model,3d-unet,_pytorch -j - ``` -=== "Onnx" +=== "MLCommons-Python" + ## MLPerf Reference Implementation in Python - ### Onnx - ``` - cm run script --tags=get,ml-model,3d-unet,_onnx -j - ``` -=== "Tensorflow" + 3d-unet-99 - ### Tensorflow - ``` - cm run script --tags=get,ml-model,3d-unet,_tensorflow -j - ``` +{{ mlperf_inference_implementation_readme (4, "3d-unet-99", "reference") }} -## Benchmark Implementations -=== "MLCommons-Python" - ### MLPerf Reference Implementation in Python + 3d-unet-99.9 - 3d-unet-99.9 {{ mlperf_inference_implementation_readme (4, "3d-unet-99.9", "reference") }} === "Nvidia" - ### Nvidia MLPerf Implementation - 3d-unet-99 + ## Nvidia MLPerf Implementation + 3d-unet-99 + {{ mlperf_inference_implementation_readme (4, "3d-unet-99", "nvidia") }} - 3d-unet-99.9 + 3d-unet-99.9 + {{ mlperf_inference_implementation_readme (4, "3d-unet-99.9", "nvidia") }} === "Intel" - ### Intel MLPerf Implementation + ## Intel MLPerf Implementation 3d-unet-99 + {{ mlperf_inference_implementation_readme (4, "3d-unet-99", "intel") }} 3d-unet-99.9 + {{ mlperf_inference_implementation_readme (4, "3d-unet-99.9", "intel") }} diff --git a/docs/benchmarks/medical_imaging/get-3d-unet-data.md b/docs/benchmarks/medical_imaging/get-3d-unet-data.md new file mode 100644 index 000000000..efc6ce6ed --- /dev/null +++ b/docs/benchmarks/medical_imaging/get-3d-unet-data.md @@ -0,0 +1,38 @@ +# Medical Imaging using 3d-unet (KiTS 2019 kidney tumor segmentation task) + +## Dataset + +The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. + +=== "Validation" + 3d-unet validation run uses the KiTS19 dataset performing [KiTS 2019](https://kits19.grand-challenge.org/) kidney tumor segmentation task + + ### Get Validation Dataset + ``` + cm run script --tags=get,dataset,kits19,validation -j + ``` + +## Model +The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. + +Get the Official MLPerf 3d-unet Model + +=== "Pytorch" + + ### Pytorch + ``` + cm run script --tags=get,ml-model,3d-unet,_pytorch -j + ``` +=== "Onnx" + + ### Onnx + ``` + cm run script --tags=get,ml-model,3d-unet,_onnx -j + ``` +=== "Tensorflow" + + ### Tensorflow + ``` + cm run script --tags=get,ml-model,3d-unet,_tensorflow -j + ``` + diff --git a/docs/benchmarks/object_detection/get-retinanet-data.md b/docs/benchmarks/object_detection/get-retinanet-data.md new file mode 100644 index 000000000..f2d432210 --- /dev/null +++ b/docs/benchmarks/object_detection/get-retinanet-data.md @@ -0,0 +1,38 @@ +# Object Detection using Retinanet + +## Dataset + +The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. + +=== "Validation" + Retinanet validation run uses the OpenImages v6 MLPerf validation dataset resized to 800x800 and consisting of 24,576 images. + + ### Get Validation Dataset + ``` + cm run script --tags=get,dataset,openimages,_validation -j + ``` +=== "Calibration" + Retinanet calibration dataset consist of 500 images selected from the OpenImages v6 dataset. + + ``` + cm run script --tags=get,dataset,openimages,_calibration -j + ``` + +## Model +The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. + +Get the Official MLPerf Retinanet Model + +=== "Pytorch" + + ### Pytorch + ``` + cm run script --tags=get,ml-model,retinanet,_pytorch -j + ``` +=== "Onnx" + + ### Onnx + ``` + cm run script --tags=get,ml-model,retinanet,_onnx -j + ``` + diff --git a/docs/benchmarks/object_detection/retinanet.md b/docs/benchmarks/object_detection/retinanet.md index f500f616d..383a2ec1b 100644 --- a/docs/benchmarks/object_detection/retinanet.md +++ b/docs/benchmarks/object_detection/retinanet.md @@ -1,63 +1,26 @@ # Object Detection using Retinanet -## Dataset - -The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. - -=== "Validation" - Retinanet validation run uses the OpenImages v6 MLPerf validation dataset resized to 800x800 and consisting of 24,576 images. - - ### Get Validation Dataset - ``` - cm run script --tags=get,dataset,openimages,_validation -j - ``` -=== "Calibration" - Retinanet calibration dataset consist of 500 images selected from the OpenImages v6 dataset. - - ``` - cm run script --tags=get,dataset,openimages,_calibration -j - ``` - -## Model -The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. - -Get the Official MLPerf Retinanet Model - -=== "Pytorch" - - ### Pytorch - ``` - cm run script --tags=get,ml-model,retinanet,_pytorch -j - ``` -=== "Onnx" - - ### Onnx - ``` - cm run script --tags=get,ml-model,retinanet,_onnx -j - ``` - -## Benchmark Implementations === "MLCommons-Python" - ### MLPerf Reference Implementation in Python + ## MLPerf Reference Implementation in Python {{ mlperf_inference_implementation_readme (4, "retinanet", "reference") }} === "Nvidia" - ### Nvidia MLPerf Implementation + ## Nvidia MLPerf Implementation {{ mlperf_inference_implementation_readme (4, "retinanet", "nvidia") }} === "Intel" - ### Intel MLPerf Implementation + ## Intel MLPerf Implementation {{ mlperf_inference_implementation_readme (4, "retinanet", "intel") }} === "Qualcomm" - ### Qualcomm AI100 MLPerf Implementation + ## Qualcomm AI100 MLPerf Implementation {{ mlperf_inference_implementation_readme (4, "retinanet", "qualcomm") }} -=== "MLCommon-C++" - ### MLPerf Modular Implementation in C++ +=== "MLCommons-C++" + ## MLPerf Modular Implementation in C++ {{ mlperf_inference_implementation_readme (4, "retinanet", "cpp") }} diff --git a/docs/benchmarks/recommendation/dlrm-v2.md b/docs/benchmarks/recommendation/dlrm-v2.md index 1294b008b..18266615f 100644 --- a/docs/benchmarks/recommendation/dlrm-v2.md +++ b/docs/benchmarks/recommendation/dlrm-v2.md @@ -1,36 +1,22 @@ # Recommendation using DLRM v2 -## Dataset - -The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. - -=== "Validation" - DLRM validation run uses the Criteo dataset (Day 23). - - ### Get Validation Dataset - ``` - cm run script --tags=get,dataset,criteo,validation -j - ``` -## Model -The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. - -Get the Official MLPerf DLRM v2 Model - -=== "Pytorch" - - ### Pytorch - ``` - cm run script --tags=get,ml-model,dlrm_v2,_pytorch -j - ``` ## Benchmark Implementations === "MLCommons-Python" - ### MLPerf Reference Implementation in Python - + ## MLPerf Reference Implementation in Python + + DLRM-v2-99 +{{ mlperf_inference_implementation_readme (4, "dlrm_v2-99", "reference") }} + + DLRM-v2-99.9 {{ mlperf_inference_implementation_readme (4, "dlrm_v2-99.9", "reference") }} === "Nvidia" - ### Nvidia MLPerf Implementation + ## Nvidia MLPerf Implementation + DLRM-v2-99 +{{ mlperf_inference_implementation_readme (4, "dlrm_v2-99", "nvidia") }} + + DLRM-v2-99.9 {{ mlperf_inference_implementation_readme (4, "dlrm_v2-99.9", "nvidia") }} diff --git a/docs/benchmarks/recommendation/get-dlrm_v2-data.md b/docs/benchmarks/recommendation/get-dlrm_v2-data.md new file mode 100644 index 000000000..97464a164 --- /dev/null +++ b/docs/benchmarks/recommendation/get-dlrm_v2-data.md @@ -0,0 +1,25 @@ +# Recommendation using DLRM v2 + +## Dataset + +The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. + +=== "Validation" + DLRM validation run uses the Criteo dataset (Day 23). + + ### Get Validation Dataset + ``` + cm run script --tags=get,dataset,criteo,validation -j + ``` +## Model +The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. + +Get the Official MLPerf DLRM v2 Model + +=== "Pytorch" + + ### Pytorch + ``` + cm run script --tags=get,ml-model,dlrm_v2,_pytorch -j + ``` + diff --git a/docs/benchmarks/text_to_image/get-sdxl-data.md b/docs/benchmarks/text_to_image/get-sdxl-data.md new file mode 100644 index 000000000..830465d44 --- /dev/null +++ b/docs/benchmarks/text_to_image/get-sdxl-data.md @@ -0,0 +1,26 @@ +# Text to Image using Stable Diffusion + +## Dataset + +The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. + +=== "Validation" + Stable Diffusion validation run uses the Coco 2014 dataset. + + ### Get Validation Dataset + ``` + cm run script --tags=get,dataset,coco2014,_validation -j + ``` + +## Model +The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. + +Get the Official MLPerf Stable Diffusion Model + +=== "Pytorch" + + ### Pytorch + ``` + cm run script --tags=get,ml-model,sdxl,_pytorch -j + ``` + diff --git a/docs/benchmarks/text_to_image/sdxl.md b/docs/benchmarks/text_to_image/sdxl.md index 2e9c95c66..2d84838d4 100644 --- a/docs/benchmarks/text_to_image/sdxl.md +++ b/docs/benchmarks/text_to_image/sdxl.md @@ -1,49 +1,23 @@ # Text to Image using Stable Diffusion -## Dataset -The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands. - -=== "Validation" - Stable Diffusion validation run uses the Coco 2014 dataset. - - ### Get Validation Dataset - ``` - cm run script --tags=get,dataset,coco2014,_validation -j - ``` - -## Model -The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. - -Get the Official MLPerf Stable Diffusion Model - -=== "Pytorch" - - ### Pytorch - ``` - cm run script --tags=get,ml-model,sdxl,_pytorch -j - ``` - -## Benchmark Implementations === "MLCommons-Python" - ### MLPerf Reference Implementation in Python + ## MLPerf Reference Implementation in Python {{ mlperf_inference_implementation_readme (4, "sdxl", "reference") }} === "Nvidia" - ### Nvidia MLPerf Implementation + ## Nvidia MLPerf Implementation {{ mlperf_inference_implementation_readme (4, "sdxl", "nvidia") }} === "Intel" - ### Intel MLPerf Implementation - GPTJ-99 + ## Intel MLPerf Implementation {{ mlperf_inference_implementation_readme (4, "sdxl", "intel") }} === "Qualcomm" - ### Qualcomm AI100 MLPerf Implementation + ## Qualcomm AI100 MLPerf Implementation - GPTJ-99 {{ mlperf_inference_implementation_readme (4, "sdxl", "qualcomm") }} diff --git a/docs/install/index.md b/docs/install/index.md index ec76718d9..10ed6e325 100644 --- a/docs/install/index.md +++ b/docs/install/index.md @@ -1,100 +1,7 @@ # Installation We use MLCommons CM Automation framework to run MLPerf inference benchmarks. -## CM Install - -We have successfully tested CM on - -* Ubuntu 18.x, 20.x, 22.x , 23.x, -* RedHat 8, RedHat 9, CentOS 8 -* macOS -* Wndows 10, Windows 11 - -=== "Ubuntu" - ### Ubuntu, Debian - - - ```bash - sudo apt update && sudo apt upgrade - sudo apt install python3 python3-pip python3-venv git wget curl - ``` - - **Note that you must set up virtual env on Ubuntu 23+ before using any Python project:** - ```bash - python3 -m venv cm - source cm/bin/activate - ``` - - You can now install CM via PIP: - - ```bash - python3 -m pip install cmind - ``` - - You might need to do the following command to update the `PATH` to include the BIN paths from pip installs - - ```bash - source $HOME/.profile - ``` - - You can check that CM is available by checking the `cm` command - - -=== "Red Hat" - ### Red Hat - - ```bash - sudo dnf update - sudo dnf install python3 python-pip git wget curl - python3 -m pip install cmind --user - ``` - -=== "macOS" - ### macOS - - *Note that CM currently does not work with Python installed from the Apple Store. - Please install Python via brew as described below.* - - If `brew` package manager is not installed, please install it as follows (see details [here](https://brew.sh/)): - ```bash - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - ``` - - Don't forget to add brew to PATH environment as described in the end of the installation output. - - Then install python, pip, git and wget: - - ```bash - brew install python3 git wget curl - python3 -m pip install cmind - ``` - -=== "Windows" - - ### Windows - * Configure Windows 10+ to [support long paths](https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry#enable-long-paths-in-windows-10-version-1607-and-later) from command line as admin: - - ```bash - reg add "HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem" /v LongPathsEnabled /t REG_DWORD /d 1 /f - ``` - - * Download and install Git from [git-for-windows.github.io](https://git-for-windows.github.io). - * Configure Git to accept long file names: `git config --system core.longpaths true` - * Download and install Python 3+ from [www.python.org/downloads/windows](https://www.python.org/downloads/windows). - * Don't forget to select option to add Python binaries to PATH environment! - * Configure Windows to accept long fie names during Python installation! - - * Install CM via PIP: - - ```bash - python -m pip install cmind - ``` - - *Note that we [have reports](https://github.com/mlcommons/ck/issues/844) - that CM does not work when Python was first installed from the Microsoft Store. - If CM fails to run, you can find a fix [here](https://stackoverflow.com/questions/57485491/python-python3-executes-in-command-prompt-but-does-not-run-correctly)*. - -Please visit the [official CM installation page](https://github.com/mlcommons/ck/blob/master/docs/installation.md) for more details +Please follow the [official installation page](https://docs.mlcommons.org/ck/install) to install CM ## Download the CM MLOps Repository @@ -102,4 +9,5 @@ Please visit the [official CM installation page](https://github.com/mlcommons/ck cm pull repo gateoverflow@cm4mlops ``` + Now, you are ready to use the `cm` commands to run MLPerf inference as given in the [benchmarks](../benchmarks/index.md) page diff --git a/docs/submission/index.md b/docs/submission/index.md index b5ff53033..94287f5d2 100644 --- a/docs/submission/index.md +++ b/docs/submission/index.md @@ -1,4 +1,4 @@ -If you follow the `cm run` commands under the individual model pages in the [benchmarks](../benchmarks) directory, all the valid results will get aggregated to the `cm cache` folder. Once all the results across all the modelsare ready you can use the following command to generate a valid submission tree compliant with the [MLPerf requirements](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). +If you follow the `cm run` commands under the individual model pages in the [benchmarks](../benchmarks/index.md) directory, all the valid results will get aggregated to the `cm cache` folder. Once all the results across all the modelsare ready you can use the following command to generate a valid submission tree compliant with the [MLPerf requirements](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). ## Generate actual submission tree diff --git a/docs/submission/tools-readme.md b/docs/submission/tools-readme.md deleted file mode 120000 index d6f026eab..000000000 --- a/docs/submission/tools-readme.md +++ /dev/null @@ -1 +0,0 @@ -../../tools/submission/README.md \ No newline at end of file diff --git a/main.py b/main.py index a83315334..f73357d4e 100644 --- a/main.py +++ b/main.py @@ -1,136 +1,291 @@ def define_env(env): - @env.macro - def mlperf_inference_implementation_readme(spaces, model, implementation): - pre_space = "" - for i in range(1,spaces): - pre_space = pre_space + " " - f_pre_space = pre_space - pre_space += " " - - content="" - if implementation == "reference": - devices = [ "CPU", "CUDA", "ROCm" ] - if model.lower() == "resnet50": - frameworks = [ "Onnxruntime", "Tensorflow", "Deepsparse" ] - elif model.lower() == "retinanet": - frameworks = [ "Onnxruntime", "Pytorch" ] - elif "bert" in model.lower(): - frameworks = [ "Onnxruntime", "Pytorch", "Tensorflow" ] - else: - frameworks = [ "Pytorch" ] - elif implementation == "nvidia": - devices = [ "CUDA" ] - frameworks = [ "TensorRT" ] - elif implementation == "intel": - devices = [ "CPU" ] - frameworks = [ "Pytorch" ] - elif implementation == "qualcomm": - devices = [ "QAIC" ] - frameworks = [ "Glow" ] - elif implementation == "cpp": - devices = [ "CPU", "CUDA" ] - frameworks = [ "Onnxruntime" ] - - if model.lower() == "bert-99.9": - categories = [ "Datacenter" ] - elif "dlrm" in model.lower() or "llama2" in model.lower(): - categories = [ "Datacenter" ] - else: - categories = [ "Edge", "Datacenter" ] - - for category in categories: - if category == "Edge": - scenarios = [ "Offline", "SingleStream" ] - if model.lower() in [ "resnet50", "retinanet" ]: - scenarios.append("Multistream") - elif category == "Datacenter": - scenarios = [ "Offline", "Server" ] - - content += f"{pre_space}=== \"{category.lower()}\"\n\n" - - cur_space = pre_space + " " - scenarios_string = ", ".join(scenarios) - - content += f"{cur_space}#### {category} category \n\n{cur_space} In the {category.lower()} category, {model} has {scenarios_string} scenarios and all the scenarios are mandatory for a closed division submission.\n\n" - - - for framework in frameworks: - cur_space1 = cur_space + " " - content += f"{cur_space}=== \"{framework}\"\n" - content += f"{cur_space1}##### {framework} framework\n\n" - - for device in devices: - if framework.lower() == "deepsparse": - if device.lower() != "cpu": - continue - cur_space2 = cur_space1 + " " - content += f"{cur_space1}=== \"{device}\"\n" - content += f"{cur_space2}###### {device} device\n\n" - - content += f"{cur_space2}###### Docker Setup Command\n\n" - test_query_count=100 - content += mlperf_inference_run_command(spaces+12, model, implementation, framework.lower(), category.lower(), "Offline", device.lower(), "test", test_query_count, True) - content += f"{cur_space2}The above command should get you to an interactive shell inside the docker container and do a quick test run for the Offline scenario. Once inside the docker container please do the below commands to do the accuracy + performance runs for each scenario.\n\n" - content += f"{cur_space2}
\n" - content += f"{cur_space2} Please click here to see more options for the docker launch \n\n" - content += f"{cur_space2}* `--docker_cm_repo `: to use a custom fork of cm4mlops repository inside the docker image\n\n" - content += f"{cur_space2}* `--docker_cache=no`: to not use docker cache during the image build\n" - - if device.lower() not in [ "cuda" ]: - content += f"{cur_space2}* `--docker_os=ubuntu`: ubuntu and rhel are supported. \n" - content += f"{cur_space2}* `--docker_os_version=20.04`: [20.04, 22.04] are supported for Ubuntu and [8, 9] for RHEL\n" - - content += f"{cur_space2}
\n" - run_suffix = "" - run_suffix += f"\n{cur_space2} ###### Run Options\n\n" - run_suffix += f"{cur_space2} * Use `--division=closed` to do a closed division submission which includes compliance runs\n\n" - run_suffix += f"{cur_space2} * Use `--rerun` to do a rerun even when a valid run exists\n\n" - - for scenario in scenarios: - cur_space3 = cur_space2 + " " - content += f"{cur_space2}=== \"{scenario}\"\n{cur_space3}####### {scenario}\n" - run_cmd = mlperf_inference_run_command(spaces+16, model, implementation, framework.lower(), category.lower(), scenario, device.lower(), "valid") - content += run_cmd - content += run_suffix - - content += f"{cur_space2}=== \"All Scenarios\"\n{cur_space3}####### All Scenarios\n" - run_cmd = mlperf_inference_run_command(spaces+16, model, implementation, framework.lower(), category.lower(), "All Scenarios", device.lower(), "valid") - content += run_cmd - content += run_suffix - - return content - - - @env.macro - def mlperf_inference_run_command(spaces, model, implementation, framework, category, scenario, device="cpu", execution_mode="test", test_query_count="20", docker=False): - pre_space = "" - for i in range(1,spaces): - pre_space = pre_space + " " - f_pre_space = pre_space - pre_space += " " - - if scenario == "All Scenarios": - scenario_variation_tag = ",_all-scenarios" - scenario_option = "" - else: - scenario_variation_tag = "" - scenario_option = f"\\\n {pre_space} --scenario={scenario}" - - if docker: - docker_cmd_suffix = f" \\\n {pre_space} --docker --quiet" - docker_cmd_suffix += f" \\\n {pre_space} --test_query_count={test_query_count}" - - docker_setup_cmd = f"\n{f_pre_space} ```bash\n{f_pre_space} cm run script --tags=run-mlperf,inference,_find-performance,_full{scenario_variation_tag} \\\n {pre_space} --model={model} \\\n {pre_space} --implementation={implementation} \\\n {pre_space} --framework={framework} \\\n {pre_space} --category={category} {scenario_option} \\\n {pre_space} --execution-mode=test \\\n {pre_space} --device={device} {docker_cmd_suffix}\n{f_pre_space} ```\n" - - return docker_setup_cmd - - else: - cmd_suffix = f"\\\n {pre_space} --quiet" - - if execution_mode == "test": - cmd_suffix += f" \\\n {pre_space} --test_query_count={test_query_count}" - - run_cmd = f"\n{f_pre_space} ```bash\n{f_pre_space} cm run script --tags=run-mlperf,inference{scenario_variation_tag} \\\n {pre_space} --model={model} \\\n {pre_space} --implementation={implementation} \\\n {pre_space} --framework={framework} \\\n {pre_space} --category={category} {scenario_option} \\\n {pre_space} --execution-mode={execution_mode} \\\n {pre_space} --device={device} {cmd_suffix}\n{f_pre_space} ```\n" - - return run_cmd + @env.macro + def mlperf_inference_implementation_readme(spaces, model, implementation): + pre_space = "" + + for i in range(1,spaces): + pre_space = pre_space + " " + f_pre_space = pre_space + pre_space += " " + + content="" + scenarios = [] + execution_envs = ["Docker","Native"] + + if implementation == "reference": + devices = [ "CPU", "CUDA", "ROCm" ] + if model.lower() == "resnet50": + frameworks = [ "Onnxruntime", "Tensorflow", "Deepsparse" ] + elif model.lower() == "retinanet": + frameworks = [ "Onnxruntime", "Pytorch" ] + elif "bert" in model.lower(): + frameworks = [ "Onnxruntime", "Pytorch", "Tensorflow" ] + else: + frameworks = [ "Pytorch" ] + + elif implementation == "nvidia": + if model in [ "sdxl", "llama2-70b-99", "llama2-70b-99.9" ]: + return pre_space+" WIP" + devices = [ "CUDA" ] + frameworks = [ "TensorRT" ] + + elif implementation == "intel": + if model not in [ "bert-99", "bert-99.9", "gptj-99", "gptj-99.9" ]: + return pre_space+" WIP" + devices = [ "CPU" ] + frameworks = [ "Pytorch" ] + + elif implementation == "qualcomm": + if model not in [ "resnet50", "retinanet", "bert-99", "bert-99.9" ]: + return pre_space+" WIP" + + devices = [ "QAIC" ] + frameworks = [ "Glow" ] + + elif implementation == "cpp": + devices = [ "CPU", "CUDA" ] + frameworks = [ "Onnxruntime" ] + + elif implementation == "ctuning-cpp": + scenarios = [ "SingleStream" ] + devices = [ "CPU" ] + if model.lower() == "resnet50": + frameworks = [ "TFLite" ] + else: + frameworks = [] + + if model.lower() == "bert-99.9": + categories = [ "Datacenter" ] + elif "dlrm" in model.lower() or "llama2" in model.lower(): + categories = [ "Datacenter" ] + else: + categories = [ "Edge", "Datacenter" ] + + for category in categories: + if category == "Edge" and not scenarios: + scenarios = [ "Offline", "SingleStream" ] + if model.lower() in [ "resnet50", "retinanet" ] and not "MultiStream" in scenarios:#MultiStream was duplicating + scenarios.append("MultiStream") + elif category == "Datacenter": + scenarios = [ "Offline", "Server" ] + + content += f"{pre_space}=== \"{category.lower()}\"\n\n" + + cur_space = pre_space + " " + scenarios_string = ", ".join(scenarios) + + content += f"{cur_space}### {category} category \n\n{cur_space} In the {category.lower()} category, {model} has {scenarios_string} scenarios and all the scenarios are mandatory for a closed division submission.\n\n" + + + for framework in frameworks: + cur_space1 = cur_space + " " + content += f"{cur_space}=== \"{framework}\"\n" + content += f"{cur_space1}#### {framework} framework\n\n" + + for device in devices: + if framework.lower() == "deepsparse": + if device.lower() != "cpu": + continue + cur_space2 = cur_space1 + " " + cur_space3 = cur_space2 + " " + cur_space4 = cur_space3 + " " + + content += f"{cur_space1}=== \"{device}\"\n" + content += f"{cur_space2}##### {device} device\n\n" + + # to select the execution environments(currently Docker and Native) + for execution_env in execution_envs: + if (device == "ROCm" or implementation == "qualcomm") and execution_env == "Docker": + continue # docker not currently supported for Qualcomm implementation and ROCm device + if implementation == "nvidia" and execution_env == "Native": + continue # Nvidia implementation only supports execution through docker + content += f"{cur_space2}=== \"{execution_env}\"\n" + content += f"{cur_space3}###### {execution_env} Environment\n\n" + test_query_count=get_test_query_count(model, implementation, device) + + if "99.9" not in model: #not showing docker command as it is already done for the 99% variant + if execution_env == "Native": # Native implementation steps through virtual environment + content += f"{cur_space3}####### Setup a virtual environment for Python\n" + content += get_venv_command(spaces+16) + content += f"{cur_space3}####### Performance Estimation for Offline Scenario\n" + content += mlperf_inference_run_command(spaces+17, model, implementation, framework.lower(), category.lower(), "Offline", device.lower(), "test", test_query_count, True).replace("--docker ","") + content += f"{cur_space3}The above command should do a test run of Offline scenario and record the estimated offline_target_qps.\n\n" + + else: # Docker implementation steps + content += f"{cur_space3}####### Docker Container Build and Performance Estimation for Offline Scenario\n" + docker_info = get_docker_info(spaces+16, model, implementation, device) + content += docker_info + content += mlperf_inference_run_command(spaces+17, model, implementation, framework.lower(), category.lower(), "Offline", device.lower(), "test", test_query_count, True) + content += f"{cur_space3}The above command should get you to an interactive shell inside the docker container and do a quick test run for the Offline scenario. Once inside the docker container please do the below commands to do the accuracy + performance runs for each scenario.\n\n" + content += f"{cur_space3}
\n" + content += f"{cur_space3} Please click here to see more options for the docker launch \n\n" + content += f"{cur_space3}* `--docker_cm_repo `: to use a custom fork of cm4mlops repository inside the docker image\n\n" + content += f"{cur_space3}* `--docker_cache=no`: to not use docker cache during the image build\n" + + if device.lower() not in [ "cuda" ]: + content += f"{cur_space3}* `--docker_os=ubuntu`: ubuntu and rhel are supported. \n" + content += f"{cur_space3}* `--docker_os_version=20.04`: [20.04, 22.04] are supported for Ubuntu and [8, 9] for RHEL\n" + + content += f"{cur_space3}
\n" + else: + content += f"{cur_space3} You can reuse the same environment as described for {model.split('.')[0]}.\n" + content += f"{cur_space3}###### Performance Estimation for Offline Scenario\n" + content += mlperf_inference_run_command(spaces+17, model, implementation, framework.lower(), category.lower(), "Offline", device.lower(), "test", test_query_count, True).replace("--docker ","") + content += f"{cur_space3}The above command should do a test run of Offline scenario and record the estimated offline_target_qps.\n\n" + + + run_suffix = "" + run_suffix += f"{cur_space3}
\n" + run_suffix += f"{cur_space3} Please click here to see more options for the RUN command\n\n" + run_suffix += f"{cur_space3}* Use `--division=closed` to do a closed division submission which includes compliance runs\n\n" + run_suffix += f"{cur_space3}* Use `--rerun` to do a rerun even when a valid run exists\n" + run_suffix += f"{cur_space3}
\n" + + for scenario in scenarios: + content += f"{cur_space3}=== \"{scenario}\"\n{cur_space4}###### {scenario}\n\n" + run_cmd = mlperf_inference_run_command(spaces+21, model, implementation, framework.lower(), category.lower(), scenario, device.lower(), "valid", scenarios) + content += run_cmd + #content += run_suffix + + content += f"{cur_space3}=== \"All Scenarios\"\n{cur_space4}###### All Scenarios\n\n" + run_cmd = mlperf_inference_run_command(spaces+21, model, implementation, framework.lower(), category.lower(), "All Scenarios", device.lower(), "valid", scenarios) + content += run_cmd + content += run_suffix + + + + readme_prefix = get_readme_prefix(spaces, model, implementation) + + readme_suffix = get_readme_suffix(spaces, model, implementation) + + return readme_prefix + content + readme_suffix + + def get_test_query_count(model, implementation, device, num_devices=1): + + if model == "resnet50": + p_range = 1000 + elif model in [ "retinanet", "bert-99", "bert-99.9" ]: + p_range = 100 + else: + p_range = 50 + + if device == "cuda": + p_range *= 40 + p_range *= num_devices + + return p_range + + def get_readme_prefix(spaces, model, implementation): + readme_prefix = "" + pre_space=" " + #for i in range(1,spaces): + # pre_space = pre_space + " " + #pre_space += " " + + return readme_prefix + + def get_venv_command(spaces): + pre_space = " "*spaces + return f"""\n +{pre_space}```bash +{pre_space}cm run script --tags=install,python-venv --name=mlperf +{pre_space}export CM_SCRIPT_EXTRA_CMD=\"--adr.python.name=mlperf\" +{pre_space}```\n""" + + def get_docker_info(spaces, model, implementation, device): + info = "" + pre_space="" + for i in range(1,spaces): + pre_space = pre_space + " " + pre_space += " " + #pre_space = " " + if implementation == "nvidia": + info += f"\n{pre_space}!!! tip\n\n" + info+= f"{pre_space} All the Nvidia benchmarks, except GPT-J and LLAMA2-70B, use the same Docker container. Therefore, if you have already executed the Docker setup command for any benchmark, you can skip the Docker setup command below and run the commands inside the existing Docker container. The Docker container for GPT-J and LLAMA2-70B is the same and can be used for the other benchmarks, but not vice versa. This is because TensorRT-LLM is built specifically for the LLM benchmarks. If you are already inside a Docker container, execute the below Docker setup command without the --docker option for performance estimation.\n\n" + return info + + def get_readme_suffix(spaces, model, implementation): + readme_suffix = "" + pre_space="" + for i in range(1,spaces): + pre_space = pre_space + " " + pre_space += " " + + if implementation == "reference": + if not model.endswith("-99"): + model_base_name = model.replace("-99.9","").replace("-99","") + readme_suffix+= f"{pre_space}* If you want to download the official MLPerf model and dataset for {model} you can follow [this README](get-{model_base_name}-data.md).\n" + if model == "resnet50": + readme_suffix += f"{pre_space}* Please see [mobilenets.md](mobilenets.md) for running mobilenet models for Image Classification." + return readme_suffix + + def get_run_cmd_extra(f_pre_space, model, implementation, device, scenario, scenarios = []): + extra_content = "" + f_pre_space += "" + if scenario == "Server" or (scenario == "All Scenarios" and "Server" in scenarios): + extra_content += f"{f_pre_space} * `` must be determined manually. It is usually around 80% of the Offline QPS, but on some systems, it can drop below 50%. If a higher value is specified, the latency constraint will not be met, and the run will be considered invalid.\n" + + if "gptj" in model and device == "cuda" and implementation == "reference": + extra_content += f"{f_pre_space} * `--precision=[float16|bfloat16]` can help run on GPUs with less RAM \n" + extra_content += f"{f_pre_space} * `--beam-size=1` Beam size of 4 is mandatory for a closed division submission but reducing the beam size can help in running the model on GPUs with lower device memory\n" + if extra_content: + extra_content = f"{f_pre_space}!!! tip\n\n" + extra_content + + return extra_content + + @env.macro + def mlperf_inference_run_command(spaces, model, implementation, framework, category, scenario, device="cpu", execution_mode="test", test_query_count="20", docker=False, scenarios = []): + pre_space = "" + for i in range(1,spaces): + pre_space = pre_space + " " + f_pre_space = pre_space + pre_space += " " + + if scenario == "All Scenarios": + scenario_variation_tag = ",_all-scenarios" + scenario_option = "" + else: + scenario_variation_tag = "" + scenario_option = f"\\\n{pre_space} --scenario={scenario}" + + if scenario == "Server" or (scenario == "All Scenarios" and "Server" in scenarios): + scenario_option = f"\\\n{pre_space} --server_target_qps=" + + run_cmd_extra = get_run_cmd_extra(f_pre_space, model, implementation, device, scenario, scenarios) + + if docker: + docker_cmd_suffix = f" \\\n{pre_space} --docker --quiet" + docker_cmd_suffix += f" \\\n{pre_space} --test_query_count={test_query_count}" + + docker_setup_cmd = f"""\n +{f_pre_space}```bash +{f_pre_space}cm run script --tags=run-mlperf,inference,_find-performance,_full{scenario_variation_tag} \\ +{pre_space} --model={model} \\ +{pre_space} --implementation={implementation} \\ +{pre_space} --framework={framework} \\ +{pre_space} --category={category} {scenario_option} \\ +{pre_space} --execution_mode=test \\ +{pre_space} --device={device} {docker_cmd_suffix} +{f_pre_space}```\n""" + + return docker_setup_cmd + run_cmd_extra + + else: + cmd_suffix = f"\\\n{pre_space} --quiet" + + if execution_mode == "test": + cmd_suffix += f" \\\n {pre_space} --test_query_count={test_query_count}" + + run_cmd = f"""\n +{f_pre_space}```bash +{f_pre_space}cm run script --tags=run-mlperf,inference{scenario_variation_tag} \\ +{pre_space} --model={model} \\ +{pre_space} --implementation={implementation} \\ +{pre_space} --framework={framework} \\ +{pre_space} --category={category} {scenario_option} \\ +{pre_space} --execution_mode={execution_mode} \\ +{pre_space} --device={device} {cmd_suffix} +{f_pre_space}```\n""" + + return run_cmd + run_cmd_extra diff --git a/mkdocs.yml b/mkdocs.yml index 0d0f64152..339a0657d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -19,11 +19,8 @@ theme: - navigation.top - toc.follow nav: - - Inference: - - index.md - Install: - install/index.md - - Quick Start: install/quick-start.md - Benchmarks: - benchmarks/index.md - Image Classification: