You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
torchbench_amp_fp16_training
xpu train vision_maskrcnn
Traceback (most recent call last):
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/common.py", line 2294, in validate_model
self.model_iter_fn(model, example_inputs)
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/torchbench.py", line 456, in forward_and_backward_pass
pred = mod(*cloned_inputs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1566, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1575, in _call_impl
return forward_call(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/models/detection/generalized_rcnn.py", line 105, in forward
detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1566, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1575, in _call_impl
return forward_call(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/models/detection/roi_heads.py", line 761, in forward
box_features = self.box_roi_pool(features, proposals, image_shapes)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1566, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1575, in _call_impl
return forward_call(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/ops/poolers.py", line 314, in forward
return _multiscale_roi_align(
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/ops/poolers.py", line 204, in _multiscale_roi_align
result_idx_in_level = roi_align(
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/ops/roi_align.py", line 238, in roi_align
return torch.ops.torchvision.roi_align(
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/ops.py", line 1064, in call
return self._op(*args, **(kwargs or {}))
RuntimeError: Expected tensor for argument #1 'input' to have the same type as tensor for argument #2 'rois'; but type torch.HalfTensor does not equal torch.FloatTensor (while checking arguments for roi_align_forward_kernel)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/common.py", line 4177, in run
) = runner.load_model(
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/torchbench.py", line 380, in load_model
self.validate_model(model, example_inputs)
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/common.py", line 2296, in validate_model
raise RuntimeError("Eager run failed") from e
RuntimeError: Eager run failed
The issue occurs exclusively in AMP mode and does not happen in BF16/FP16 modes. I suspect the crash might be due to the absence of the autocastxpu backend for the torchvision ROI align operator. @fengyuan14 could you provide your insights on this?
🐛 Describe the bug
torchbench_amp_fp16_training
xpu train vision_maskrcnn
Traceback (most recent call last):
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/common.py", line 2294, in validate_model
self.model_iter_fn(model, example_inputs)
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/torchbench.py", line 456, in forward_and_backward_pass
pred = mod(*cloned_inputs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1566, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1575, in _call_impl
return forward_call(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/models/detection/generalized_rcnn.py", line 105, in forward
detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1566, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1575, in _call_impl
return forward_call(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/models/detection/roi_heads.py", line 761, in forward
box_features = self.box_roi_pool(features, proposals, image_shapes)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1566, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1575, in _call_impl
return forward_call(*args, **kwargs)
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/ops/poolers.py", line 314, in forward
return _multiscale_roi_align(
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/ops/poolers.py", line 204, in _multiscale_roi_align
result_idx_in_level = roi_align(
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torchvision/ops/roi_align.py", line 238, in roi_align
return torch.ops.torchvision.roi_align(
File "/home/sdp/miniforge3/envs/e2e_ci/lib/python3.10/site-packages/torch/ops.py", line 1064, in call
return self._op(*args, **(kwargs or {}))
RuntimeError: Expected tensor for argument #1 'input' to have the same type as tensor for argument #2 'rois'; but type torch.HalfTensor does not equal torch.FloatTensor (while checking arguments for roi_align_forward_kernel)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/common.py", line 4177, in run
) = runner.load_model(
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/torchbench.py", line 380, in load_model
self.validate_model(model, example_inputs)
File "/home/sdp/actions-runner/_work/torch-xpu-ops/pytorch/benchmarks/dynamo/common.py", line 2296, in validate_model
raise RuntimeError("Eager run failed") from e
RuntimeError: Eager run failed
eager_fail_to_run
Versions
torch-xpu-ops: 31c4001
pytorch: 0f81473d7b4a1bf09246410712df22541be7caf3 + PRs: 127277,129120
device: PVC 1100, 803.61, 0.5.1
The text was updated successfully, but these errors were encountered: