Skip to content

Commit

Permalink
Use aimet FP16 quantization flow instead of TF flow for GPU
Browse files Browse the repository at this point in the history
Signed-off-by: yathindra kota <quic_ykota@quicinc.com>
  • Loading branch information
quic-ykota authored and quic-bharathr committed Jul 11, 2023
1 parent 432fe22 commit 8dd5e24
Showing 1 changed file with 4 additions and 9 deletions.
13 changes: 4 additions & 9 deletions TrainingExtensions/tensorflow/src/AimetFp16OpUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#define AIMET_FP16_OP_UTILS_H

#include "AimetOpUtils.h"
#include "DlQuantization/Fp16Quantization.hpp"

#define EIGEN_USE_THREADS

Expand Down Expand Up @@ -86,18 +87,12 @@ class QuantizeDequantizeFp16Functor <CPUDevice>
template <>
class QuantizeDequantizeFp16Functor <GPUDevice>
{
// truncate, if set to true would truncate the inputs before casting to fp16. If set to true, tensorflow backend
// calls LSBZeroSetter which does the truncate operation
bool _truncate = false;

public:
void operator()(OpKernelContext* context, const Tensor& inTensor, Tensor* outTensor)
{
Tensor tempTensorFp16;
OP_REQUIRES_OK(context, context->allocate_temp(DT_HALF, inTensor.shape(), &tempTensorFp16));

GetGpuCastFromFloat(DT_HALF)(context, inTensor, &tempTensorFp16, _truncate);
GetGpuCastFromHalf(DT_FLOAT)(context, tempTensorFp16, outTensor, _truncate);
DlQuantization::quantizeDequantizeFp16Gpu(inTensor.flat<float>().data(),
inTensor.NumElements(),
outTensor->flat<float>().data());
}
};
#endif // GOOGLE_CUDA
Expand Down

0 comments on commit 8dd5e24

Please sign in to comment.