diff --git a/src/transformers/models/deberta/modeling_deberta.py b/src/transformers/models/deberta/modeling_deberta.py index 2d9e647c130cab..0e4614714b3fee 100644 --- a/src/transformers/models/deberta/modeling_deberta.py +++ b/src/transformers/models/deberta/modeling_deberta.py @@ -187,6 +187,8 @@ def backward(ctx, grad_output): @staticmethod def symbolic(g: torch._C.Graph, input: torch._C.Value, local_ctx: Union[float, DropoutContext]) -> torch._C.Value: + from torch.onnx import symbolic_opset12 + dropout_p = local_ctx if isinstance(local_ctx, DropoutContext): dropout_p = local_ctx.dropout diff --git a/src/transformers/models/deberta_v2/modeling_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_deberta_v2.py index 738981648af956..c6e2b25dc0e812 100644 --- a/src/transformers/models/deberta_v2/modeling_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_deberta_v2.py @@ -193,6 +193,8 @@ def backward(ctx, grad_output): @staticmethod def symbolic(g: torch._C.Graph, input: torch._C.Value, local_ctx: Union[float, DropoutContext]) -> torch._C.Value: + from torch.onnx import symbolic_opset12 + dropout_p = local_ctx if isinstance(local_ctx, DropoutContext): dropout_p = local_ctx.dropout diff --git a/src/transformers/models/sew_d/modeling_sew_d.py b/src/transformers/models/sew_d/modeling_sew_d.py index e582705ab09424..da6fd8d168dc2a 100644 --- a/src/transformers/models/sew_d/modeling_sew_d.py +++ b/src/transformers/models/sew_d/modeling_sew_d.py @@ -597,6 +597,8 @@ def backward(ctx, grad_output): @staticmethod def symbolic(g: torch._C.Graph, input: torch._C.Value, local_ctx: Union[float, DropoutContext]) -> torch._C.Value: + from torch.onnx import symbolic_opset12 + dropout_p = local_ctx if isinstance(local_ctx, DropoutContext): dropout_p = local_ctx.dropout