llvm · qedawkins · Nov 8, 2023 · Nov 7, 2023 · Nov 7, 2023 · Nov 7, 2023
diff --git a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
@@ -6419,6 +6419,30 @@ def Torch_AtenPermuteOp : Torch_Op<"aten.permute", [
   }];
 }
 
+def Torch_AtenPixelShuffleOp : Torch_Op<"aten.pixel_shuffle", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::pixel_shuffle : (Tensor, int) -> (Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$self,
+    Torch_IntType:$upscale_factor
+  );
+  let results = (outs
+    AnyTorchTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenPixelShuffleOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 2, 1);
+    }
+    void AtenPixelShuffleOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 2, 1);
+    }
+  }];
+}
+
 def Torch_AtenMovedimIntOp : Torch_Op<"aten.movedim.int", [
     AllowsTypeRefinement,
     ReadOnly

diff --git a/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp b/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp
@@ -9,6 +9,7 @@
 
 #include "PassDetail.h"
 
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BuiltinDialect.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -93,7 +94,7 @@ static Value createSumAlongDimension(PatternRewriter &rewriter, Location loc,
                                               keepDimCst, dtype);
 }
 
-// Redunction function to calculate max along given `dim`.
+// Reduction function to calculate max along given `dim`.
 static Value createMaxAlongDimension(PatternRewriter &rewriter, Location loc,
                                      Operation *op, Value input, Value dim,
                                      bool keepDim) {
@@ -211,6 +212,7 @@ class DecomposeAtenAmaxOp : public OpRewritePattern<AtenAmaxOp> {
     Location loc = op.getLoc();
     SmallVector<int64_t, 4> dims;
     if (!matchPattern(op.getDim(), m_TorchListOfConstantInts(dims)))
+
       return rewriter.notifyMatchFailure(op,
                                          "non-const dim parameter unsupported");
 
@@ -227,8 +229,7 @@ class DecomposeAtenAmaxOp : public OpRewritePattern<AtenAmaxOp> {
     }
     // For every dimension included in `dim` of the op, iterated over in
     // reverse order, we create a call to aten.max.dim.
-    std::sort(dims.begin(), dims.end());
-    std::reverse(dims.begin(), dims.end());
+    std::sort(dims.rbegin(), dims.rend());
     for (int64_t dimInt : dims) {
       int64_t inputRank = inputTy.getSizes().size();
       dimInt = toPositiveDim(dimInt, inputRank);
@@ -255,6 +256,7 @@ class DecomposeAtenSizeOp : public OpRewritePattern<AtenSizeOp> {
     Location loc = op.getLoc();
     Value self = op.getSelf();
     MLIRContext *context = op.getContext();
+
     std::optional<unsigned> maybeRank = getTensorRank(self);
     if (!maybeRank)
       return rewriter.notifyMatchFailure(op, "Unimplemented: unranked tensor");
@@ -386,9 +388,10 @@ class DecomposeAtenGluOp : public OpRewritePattern<AtenGluOp> {
 
     Value remainder = rewriter.create<AtenRemainderIntOp>(loc, dimSize, two);
     Value eqOrNot = rewriter.create<AtenEqIntOp>(loc, remainder, zero);
+
     rewriter.create<RuntimeAssertOp>(
         loc, eqOrNot,
-        rewriter.getStringAttr("AtenGluOp's dim size must be multiply of 2"));
+        rewriter.getStringAttr("AtenGluOp's dim size must be multiple of 2"));
 
     Value splitLength = rewriter.create<AtenFloordivIntOp>(loc, dimSize, two);
     Value a = rewriter.create<AtenNarrowOp>(loc, outputTy, self, dim, zero,
@@ -443,6 +446,7 @@ class DecomposeAtenEyeMOp : public OpRewritePattern<AtenEyeMOp> {
                                 PatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     int64_t n;
+
     if (!matchPattern(op.getN(), m_TorchConstantInt(&n)))
       return rewriter.notifyMatchFailure(op,
                                          "unimplemented: n must be constant");
@@ -1092,9 +1096,223 @@ class DecomposeAtenMvOp : public OpRewritePattern<AtenMvOp> {
 };
 } // namespace
 
+// Decompose aten.pixel_shuffle into: aten.permute and aten.reshape operations.
+//
+// If input is a tensor of shape (*leading_dims, C*r*r, H, W), where
+// leading_dims is of size N, then
+//    X = pixel_shuffle(input, upscale_factor)
+//
+// gets replaced with
+//    A = input.reshape(*leading_dims, C, r, r, H, W)
+//    B = A.permute(0, ..., N, N+3, N+1, N+4, N+2)
+//    X = B.reshape(*leading_dims, C, r*H, r*W)
+//
+// 'r' above is referred to as the 'upscale factor' or just 'factor' below.
+namespace {
+class DecomposeAtenPixelShuffleOp
+    : public OpRewritePattern<AtenPixelShuffleOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+  LogicalResult matchAndRewrite(AtenPixelShuffleOp op,
+                                PatternRewriter &rewriter) const override {
+
+
+    Location loc = op.getLoc();
+    Value inValue = op.getSelf();
+    auto inType = inValue.getType().cast<BaseTensorType>();
+    auto maybeSizes = inType.getOptionalSizes();
+    if (!maybeSizes) {
+      return rewriter.notifyMatchFailure(
+          op, "Expected input tensor to have known rank.");
+    }
+    auto inShape = maybeSizes.value();
+    auto inRank = inShape.size();
+
+    // TODO: Handle dynamic shapes.
+    //
+    // Currently the decomposition of inputs with dynamic shapes results in a
+    // failure when lowering to linalg. Specifically, the error:
+    //
+    // "Unhandled case of expand/collapse. "
+    //
+    // is hit in the lowering of aten.view to the linalg dialect (see
+    // ConvertAtenViewOp in TorchToLinalg/DataMovement.cpp where this error is
+    // thrown).
+    //
+    // The issue is that there is no way for the lowering from torch to linalg
+    // to know that the view ops (reshape ops) created in this decomposition can
+    // be mapped directly to tensor.expand_shape and tensor_collapse_shape. We
+    // DO know that this is the case: the first reshape is an expand_shape
+    // and the second one is a collapse_shape. But we only know this because of
+    // the semantics of pixel_shuffle, by the time the lowering sees the view
+    // ops, this special context is lost.
+    //
+    // There are a few possible options to fixing the current approach, as I see
+    // it:
+    //
+    // 1) Lower pixel_shuffle to linalg directly. This would mean a bit of
+    // duplication of the logic for lowering from aten.permute to linalg, and
+    // would not be the most 'gradual' lowering approach. But perhaps the most
+    // straightforward and understable.
+    //
+    // 2) Rather than use aten.reshape ops in the decomposition below, insert
+    // tensor.expand_shape and tensor.collapse_shape ops directly. This would
+    // require inserting unrealized_conversion_cast ops to convert the types of
+    // the operands from !torch.tensor to to the MLIR built-in tensor type,
+    // which isn't ideal (unrealized_conversion_cast should only be used in
+    // conversion passes AFAIK (?)). UPDATE: not a viable option as we cannot
+    // make assumptions about the dialects that torch is lowered to.
+    //
+    // 3) Create 2 new ops in the torch dialect, with the same semantics as
+    // tensor.expand_shape and tensor.collapse_shape, but with !torch.tensor
+    // operands and results. Then use these ops in the decomposition below,
+    // instead of aten.reshape ops.
+    //
+    // For now, we just fail the decomposition here so that a sensible error is
+    // provided:
+    for (auto dimSize : inShape) {
+      if (dimSize == kUnknownSize) {
+        return rewriter.notifyMatchFailure(
+            op, "Currently we only decompose pixel_shuffle if the input tensor "
+                "is statically shaped");
+      }
+    }
+    // The input tensor must have at least 3 dimensions: (1) the channel
+    // dimension which gets smaller by 'factor*factor', (2) the H channel which
+    // gets larger by 'factor' and (3) the W channel which get larger by
+    // 'factor'. The total number of dimensions is 3 + N, where N is the number
+    // of leading dimensions, and N >= 0 so the input must have rank at least 3. 
+    if (inRank < 3)
+      return rewriter.notifyMatchFailure(
+          op, "Expected input tensor to have rank greater than 2.");
+
+    auto nLeadingDims = inRank - 3;
+
+    // Get the size of the dimension 'i'. Note the use of 'createOrFold' instead
+    // of 'create': if the dimension size is known, then the AtenSizeIntOp is
+    // folded to a ConstantOp.
+    auto getDimSize = [&](uint64_t i) -> Value {
+      Value dim =
+          rewriter.create<ConstantIntOp>(loc, rewriter.getI64IntegerAttr(i));
+      return rewriter.createOrFold<AtenSizeIntOp>(loc, inValue, dim);
+    };
+
+    auto inC = getDimSize(inRank - 3);
+    auto inH = getDimSize(inRank - 2);
+    auto inW = getDimSize(inRank - 1);
+
+    auto factor = op.getUpscaleFactor();
+
+
+    Value factorSquared =
+        rewriter.createOrFold<AtenMulIntOp>(loc, factor, factor);
+    Value outC =
+        rewriter.createOrFold<AtenFloordivIntOp>(loc, inC, factorSquared);
+
+    Value outH = rewriter.createOrFold<AtenMulIntOp>(loc, inH, factor);
+    Value outW = rewriter.createOrFold<AtenMulIntOp>(loc, inW, factor);
+
+    // Shape of 'A' in the comment at the top
+    SmallVector<Value> prePermuteShape;
+    prePermuteShape.reserve(nLeadingDims + 5);
+
+    // Shape of 'B' in the comment at the top.
+    SmallVector<Value> postPermuteShape;
+    postPermuteShape.reserve(nLeadingDims + 5);
+
+    SmallVector<Value> outShape;
+    outShape.reserve(nLeadingDims + 3);
+
+    SmallVector<Value> permutation;
+    permutation.reserve(nLeadingDims + 5);
+
+    for (unsigned i = 0; i < nLeadingDims; ++i) {
+      auto dimensionAttr = rewriter.getI64IntegerAttr(i);
+      Value dimensionValue = rewriter.create<ConstantIntOp>(loc, dimensionAttr);
+      Value leadingDimSize =
+          rewriter.createOrFold<AtenSizeIntOp>(loc, inValue, dimensionValue);
+      prePermuteShape.push_back(leadingDimSize);
+      postPermuteShape.push_back(leadingDimSize);
+      outShape.push_back(leadingDimSize);
+      permutation.push_back(dimensionValue);
+
+    }
+
+    const auto inOptionalDType = inType.getOptionalDtype();
+
+    auto getTypeFromShape = [inOptionalDType](auto &&vals) {
+      // Get a vector of integers from a vector of Values.
+      auto getIntShape = [](auto &&vals) {
+        SmallVector<int64_t> shape;
+        shape.reserve(vals.size());
+        for (auto v : vals) {
+          int64_t cst_val;
+          if (matchPattern(v, m_TorchConstantInt(&cst_val))) {
+            shape.push_back(cst_val);
+          } else {
+            shape.push_back(kUnknownSize);
+          }
+        }
+        return shape;
+      };
+
+      const auto intShape = getIntShape(vals);
+      return ValueTensorType::get(vals[0].getContext(),
+                                  llvm::ArrayRef(intShape), inOptionalDType);
+    };
+
+    prePermuteShape.insert(prePermuteShape.end(),
+                           {outC, factor, factor, inH, inW});
+
+    postPermuteShape.insert(postPermuteShape.end(),
+                            {outC, inH, factor, inW, factor});
+
+    outShape.insert(outShape.end(), {outC, outH, outW});
+
+    SmallVector<uint64_t> permutationTail{0, 3, 1, 4, 2};
+    for (uint64_t d : permutationTail) {
+      permutation.push_back(rewriter.create<ConstantIntOp>(
+          loc, rewriter.getI64IntegerAttr(nLeadingDims + d)));
+    }
+
+    auto listType = Torch::ListType::get(Torch::IntType::get(op.getContext()));
+
+
+     Value shapeA =
+         rewriter.create<PrimListConstructOp>(loc, listType, prePermuteShape);
+
+     Value A = rewriter.create<AtenReshapeOp>(
+         loc, getTypeFromShape(prePermuteShape), inValue, shapeA);
+
+     Value permuteDimsOrder = rewriter.create<PrimListConstructOp>(
+         loc, Torch::ListType::get(Torch::IntType::get(op->getContext())),
+         permutation);
+
+     Value B = rewriter.create<AtenPermuteOp>(
+         loc, getTypeFromShape(postPermuteShape), A, permuteDimsOrder);
+
+     Value outShapeList =
+         rewriter.create<PrimListConstructOp>(loc, listType, outShape);
+
+     // TODO(jn) figure out why the deduced return type (like
+     // !torch.vtensor<[3,4,6],si64>) cannot be used for the type of the
+     // replacement of the pixel_shuffle op's output. The pattern seems to
+     // require the generic !torch.vtensor type.
+
+     auto deducedReturnType = getTypeFromShape(outShape);
+     auto genericReturnType  = deducedReturnType.getWithSizesAndDtype({}, {});
+
+     rewriter.replaceOpWithNewOp<AtenReshapeOp>(op, genericReturnType, B,
+                                                outShapeList);
+
+     return success();
+  }
+};
+} // namespace
+
 // ReLU6(x) = min(max(0, x), 6) = min(Relu(x), 6)
-static Value getRelu6Results(PatternRewriter &rewriter, Location loc,
-                             Value input) {
+static Value
+getRelu6Results(PatternRewriter &rewriter, Location loc, Value input) {
   BaseTensorType inputType = input.getType().cast<BaseTensorType>();
 
   Value relu = rewriter.create<AtenReluOp>(loc, inputType, input);
@@ -4717,8 +4935,7 @@ class DecomposePrimsSqueezeOp : public OpRewritePattern<PrimsSqueezeOp> {
       return rewriter.notifyMatchFailure(
           op, "all dimensions must be constant ints");
 
-    std::sort(dimensions.begin(), dimensions.end());
-    std::reverse(dimensions.begin(), dimensions.end());
+    std::sort(dimensions.rbegin(), dimensions.rend());
 
     if (dimensions.size() == 0) {
       rewriter.replaceOp(op, input);
@@ -5463,6 +5680,7 @@ class DecomposeComplexOpsPass
     addPatternIfTargetOpIsIllegal<DecomposeAtenSelectIntOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenMatmulOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenMvOp>(patterns);
+    addPatternIfTargetOpIsIllegal<DecomposeAtenPixelShuffleOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenTOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAten_LogSoftmaxBackwardDataOp>(
         patterns);

diff --git a/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp b/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp
@@ -391,6 +391,7 @@ static void markDecomposedOpsAsIllegal(MLIRContext *context,
   target.addIllegalOp<AtenNormScalarOptDimOp>();
   target.addIllegalOp<AtenSelectIntOp>();
   target.addIllegalOp<AtenMvOp>();
+  target.addIllegalOp<AtenPixelShuffleOp>();
   target.addIllegalOp<AtenTOp>();
   target.addIllegalOp<Aten_LogSoftmaxBackwardDataOp>();
   target.addDynamicallyLegalOp<AtenMatmulOp>([](AtenMatmulOp op) {

diff --git a/lib/Dialect/Torch/Utils/Utils.cpp b/lib/Dialect/Torch/Utils/Utils.cpp
@@ -206,7 +206,7 @@ bool Torch::isViewLikeOp(Operation *op) {
              TensorStaticInfoCastOp, AtenToDtypeLayoutOp, AtenNumpyTOp,
              AtenNarrowOp, AtenNarrowTensorOp, AtenToDeviceOp, PrimsSqueezeOp,
              AtenMovedimIntOp, PrimsViewOfOp, AtenRealOp, AtenImagOp,
-             AtenViewAsComplexOp, AtenViewAsRealOp>(op);
+             AtenViewAsComplexOp, AtenViewAsRealOp, AtenPixelShuffleOp>(op);
 }
 
 Value Torch::getConstantWithGivenDtypeAndValue(PatternRewriter &rewriter,

diff --git a/projects/pt1/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/torch_ods_gen.py b/projects/pt1/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/torch_ods_gen.py
@@ -481,6 +481,7 @@ def emit_with_mutating_variants(key, **kwargs):
     emit("aten::topk : (Tensor, int, int, bool, bool) -> (Tensor, Tensor)")
     emit("aten::transpose.int : (Tensor, int, int) -> (Tensor)")
     emit("aten::permute : (Tensor, int[]) -> (Tensor)")
+    emit("aten::pixel_shuffle : (Tensor, int) -> (Tensor)")
     emit("aten::movedim.int : (Tensor, int, int) -> (Tensor)")
     emit("aten::bmm : (Tensor, Tensor) -> (Tensor)")
     emit("aten::cumsum : (Tensor, int, int?) -> (Tensor)")