llvm · qedawkins · Nov 8, 2023 · Nov 7, 2023 · Nov 7, 2023 · Nov 7, 2023
diff --git a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
@@ -6419,6 +6419,30 @@ def Torch_AtenPermuteOp : Torch_Op<"aten.permute", [
   }];
 }
 
+def Torch_AtenPixelShuffleOp : Torch_Op<"aten.pixel_shuffle", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::pixel_shuffle : (Tensor, int) -> (Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$self,
+    Torch_IntType:$upscale_factor
+  );
+  let results = (outs
+    AnyTorchTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenPixelShuffleOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 2, 1);
+    }
+    void AtenPixelShuffleOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 2, 1);
+    }
+  }];
+}
+
 def Torch_AtenMovedimIntOp : Torch_Op<"aten.movedim.int", [
     AllowsTypeRefinement,
     ReadOnly

diff --git a/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp b/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp
@@ -6749,6 +6749,50 @@ StringRef mlir::torch::Torch::getAbstractInterpLibrary() {
 "    %3 = call @__torch__.torch.jit._shape_functions.sum_mean_dim(%arg0, %1, %arg2, %2) : (!torch.list<int>, !torch.optional<list<int>>, !torch.bool, !torch.any) -> !torch.list<int>\n"
 "    return %3 : !torch.list<int>\n"
 "  }\n"
+"  func.func @\"__torch_mlir_shape_fn.aten.pixel_shuffle\"(%arg0: !torch.list<int>, %arg1: !torch.int) -> !torch.list<int> {\n"
+"    %int-1 = torch.constant.int -1\n"
+"    %int-2 = torch.constant.int -2\n"
+"    %int1 = torch.constant.int 1\n"
+"    %str = torch.constant.str \"AssertionError: number of input channels  must be divisible by upscale_factor^2 in pixel_shuffle\"\n"
+"    %int-3 = torch.constant.int -3\n"
+"    %none = torch.constant.none\n"
+"    %str_0 = torch.constant.str \"AssertionError: input must be at least rank-3 in pixel_shuffle\"\n"
+"    %int3 = torch.constant.int 3\n"
+"    %int0 = torch.constant.int 0\n"
+"    %0 = torch.aten.len.t %arg0 : !torch.list<int> -> !torch.int\n"
+"    %1 = torch.aten.ge.int %0, %int3 : !torch.int, !torch.int -> !torch.bool\n"
+"    torch.prim.If %1 -> () {\n"
+"      torch.prim.If.yield\n"
+"    } else {\n"
+"      torch.prim.RaiseException %str_0, %none : !torch.str, !torch.none\n"
+"      torch.prim.If.yield\n"
+"    }\n"
+"    %2 = torch.aten.mul.int %arg1, %arg1 : !torch.int, !torch.int -> !torch.int\n"
+"    %3 = torch.aten.__getitem__.t %arg0, %int-3 : !torch.list<int>, !torch.int -> !torch.int\n"
+"    %4 = torch.aten.remainder.int %3, %2 : !torch.int, !torch.int -> !torch.int\n"
+"    %5 = torch.aten.eq.int %4, %int0 : !torch.int, !torch.int -> !torch.bool\n"
+"    torch.prim.If %5 -> () {\n"
+"      torch.prim.If.yield\n"
+"    } else {\n"
+"      torch.prim.RaiseException %str, %none : !torch.str, !torch.none\n"
+"      torch.prim.If.yield\n"
+"    }\n"
+"    %6 = torch.aten.slice.t %arg0, %int0, %int-3, %int1 : !torch.list<int>, !torch.int, !torch.int, !torch.int -> !torch.list<int>\n"
+"    %7 = torch.aten.__getitem__.t %arg0, %int-3 : !torch.list<int>, !torch.int -> !torch.int\n"
+"    %8 = torch.aten.floordiv.int %7, %2 : !torch.int, !torch.int -> !torch.int\n"
+"    %9 = torch.aten.append.t %6, %8 : !torch.list<int>, !torch.int -> !torch.list<int>\n"
+"    %10 = torch.aten.__getitem__.t %arg0, %int-2 : !torch.list<int>, !torch.int -> !torch.int\n"
+"    %11 = torch.aten.mul.int %10, %arg1 : !torch.int, !torch.int -> !torch.int\n"
+"    %12 = torch.aten.append.t %6, %11 : !torch.list<int>, !torch.int -> !torch.list<int>\n"
+"    %13 = torch.aten.__getitem__.t %arg0, %int-1 : !torch.list<int>, !torch.int -> !torch.int\n"
+"    %14 = torch.aten.mul.int %13, %arg1 : !torch.int, !torch.int -> !torch.int\n"
+"    %15 = torch.aten.append.t %6, %14 : !torch.list<int>, !torch.int -> !torch.list<int>\n"
+"    return %6 : !torch.list<int>\n"
+"  }\n"
+"  func.func @\"__torch_mlir_dtype_fn.aten.pixel_shuffle\"(%arg0: !torch.tuple<int, int>, %arg1: !torch.int) -> !torch.int {\n"
+"    %0:2 = torch.prim.TupleUnpack %arg0 : !torch.tuple<int, int> -> !torch.int, !torch.int\n"
+"    return %0#1 : !torch.int\n"
+"  }\n"
 "  func.func @\"__torch_mlir_shape_fn.aten.permute\"(%arg0: !torch.list<int>, %arg1: !torch.list<int>) -> !torch.list<int> {\n"
 "    %0 = call @__torch__.torch.jit._shape_functions.permute(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"

diff --git a/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp b/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp
@@ -93,7 +93,7 @@ static Value createSumAlongDimension(PatternRewriter &rewriter, Location loc,
                                               keepDimCst, dtype);
 }
 
-// Redunction function to calculate max along given `dim`.
+// Reduction function to calculate max along given `dim`.
 static Value createMaxAlongDimension(PatternRewriter &rewriter, Location loc,
                                      Operation *op, Value input, Value dim,
                                      bool keepDim) {
@@ -211,6 +211,7 @@ class DecomposeAtenAmaxOp : public OpRewritePattern<AtenAmaxOp> {
     Location loc = op.getLoc();
     SmallVector<int64_t, 4> dims;
     if (!matchPattern(op.getDim(), m_TorchListOfConstantInts(dims)))
+
       return rewriter.notifyMatchFailure(op,
                                          "non-const dim parameter unsupported");
 
@@ -227,8 +228,7 @@ class DecomposeAtenAmaxOp : public OpRewritePattern<AtenAmaxOp> {
     }
     // For every dimension included in `dim` of the op, iterated over in
     // reverse order, we create a call to aten.max.dim.
-    std::sort(dims.begin(), dims.end());
-    std::reverse(dims.begin(), dims.end());
+    std::sort(dims.rbegin(), dims.rend());
     for (int64_t dimInt : dims) {
       int64_t inputRank = inputTy.getSizes().size();
       dimInt = toPositiveDim(dimInt, inputRank);
@@ -255,6 +255,7 @@ class DecomposeAtenSizeOp : public OpRewritePattern<AtenSizeOp> {
     Location loc = op.getLoc();
     Value self = op.getSelf();
     MLIRContext *context = op.getContext();
+
     std::optional<unsigned> maybeRank = getTensorRank(self);
     if (!maybeRank)
       return rewriter.notifyMatchFailure(op, "Unimplemented: unranked tensor");
@@ -386,9 +387,10 @@ class DecomposeAtenGluOp : public OpRewritePattern<AtenGluOp> {
 
     Value remainder = rewriter.create<AtenRemainderIntOp>(loc, dimSize, two);
     Value eqOrNot = rewriter.create<AtenEqIntOp>(loc, remainder, zero);
+
     rewriter.create<RuntimeAssertOp>(
         loc, eqOrNot,
-        rewriter.getStringAttr("AtenGluOp's dim size must be multiply of 2"));
+        rewriter.getStringAttr("AtenGluOp's dim size must be multiple of 2"));
 
     Value splitLength = rewriter.create<AtenFloordivIntOp>(loc, dimSize, two);
     Value a = rewriter.create<AtenNarrowOp>(loc, outputTy, self, dim, zero,
@@ -443,6 +445,7 @@ class DecomposeAtenEyeMOp : public OpRewritePattern<AtenEyeMOp> {
                                 PatternRewriter &rewriter) const override {
     Location loc = op.getLoc();
     int64_t n;
+
     if (!matchPattern(op.getN(), m_TorchConstantInt(&n)))
       return rewriter.notifyMatchFailure(op,
                                          "unimplemented: n must be constant");
@@ -1092,9 +1095,181 @@ class DecomposeAtenMvOp : public OpRewritePattern<AtenMvOp> {
 };
 } // namespace
 
+// Decompose aten.pixel_shuffle into: aten.permute and aten.reshape operations.
+//
+// If input is a tensor of shape (*leading_dims, C*r*r, H, W), where
+// leading_dims is of size N, then
+//    X = pixel_shuffle(input, upscale_factor)
+//
+// gets replaced with
+//    A = input.reshape(*leading_dims, C, r, r, H, W)
+//    B = A.permute(0, ..., N, N+3, N+1, N+4, N+2)
+//    X = B.reshape(*leading_dims, C, r*H, r*W)
+//
+// 'r' above is referred to as the 'upscale factor' or just 'factor' below.
+namespace {
+class DecomposeAtenPixelShuffleOp
+    : public OpRewritePattern<AtenPixelShuffleOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+  LogicalResult matchAndRewrite(AtenPixelShuffleOp op,
+                                PatternRewriter &rewriter) const override {
+
+
+    Location loc = op.getLoc();
+    Value inValue = op.getSelf();
+    auto inType = inValue.getType().cast<BaseTensorType>();
+    auto maybeSizes = inType.getOptionalSizes();
+    if (!maybeSizes) {
+      return rewriter.notifyMatchFailure(
+          op, "Expected input tensor to have known rank.");
+    }
+    auto inShape = maybeSizes.value();
+    auto inRank = inShape.size();
+
+    // TODO support dynamic shapes, probably by lowering pixel_shuffle to linalg
+    // directly. Pixel shuffle does a reshape that is hard to recover
+    // through pure torch (view) ops, especially in dynamic cases.
+    //
+    // See: https://github.com/llvm/torch-mlir/issues/2559
+    //
+    // For now, we just fail the decomposition here so that a sensible error is
+    // provided:
+    for (auto dimSize : inShape) {
+      if (dimSize == kUnknownSize) {
+        return rewriter.notifyMatchFailure(
+            op, "Currently we only decompose pixel_shuffle if the input tensor "
+                "is statically shaped");
+      }
+    }
+    // The input tensor must have at least 3 dimensions: (1) the channel
+    // dimension which gets smaller by 'factor*factor', (2) the H channel which
+    // gets larger by 'factor' and (3) the W channel which get larger by
+    // 'factor'. The total number of dimensions is 3 + N, where N is the number
+    // of leading dimensions, and N >= 0 so the input must have rank at least 3. 
+    if (inRank < 3)
+      return rewriter.notifyMatchFailure(
+          op, "Expected input tensor to have rank greater than 2.");
+
+    auto nLeadingDims = inRank - 3;
+
+    // Get the size of the dimension 'i'. Note the use of 'createOrFold' instead
+    // of 'create': if the dimension size is known, then the AtenSizeIntOp is
+    // folded to a ConstantOp.
+    auto getDimSize = [&](uint64_t i) -> Value {
+      Value dim =
+          rewriter.create<ConstantIntOp>(loc, rewriter.getI64IntegerAttr(i));
+      return rewriter.createOrFold<AtenSizeIntOp>(loc, inValue, dim);
+    };
+
+    auto inC = getDimSize(inRank - 3);
+    auto inH = getDimSize(inRank - 2);
+    auto inW = getDimSize(inRank - 1);
+
+    auto factor = op.getUpscaleFactor();
+
+
+    Value factorSquared =
+        rewriter.createOrFold<AtenMulIntOp>(loc, factor, factor);
+    Value outC =
+        rewriter.createOrFold<AtenFloordivIntOp>(loc, inC, factorSquared);
+
+    Value outH = rewriter.createOrFold<AtenMulIntOp>(loc, inH, factor);
+    Value outW = rewriter.createOrFold<AtenMulIntOp>(loc, inW, factor);
+
+    // Shape of 'A' in the comment at the top
+    SmallVector<Value> prePermuteShape;
+    prePermuteShape.reserve(nLeadingDims + 5);
+
+    // Shape of 'B' in the comment at the top.
+    SmallVector<Value> postPermuteShape;
+    postPermuteShape.reserve(nLeadingDims + 5);
+
+    SmallVector<Value> outShape;
+    outShape.reserve(nLeadingDims + 3);
+
+    SmallVector<Value> permutation;
+    permutation.reserve(nLeadingDims + 5);
+
+    for (unsigned i = 0; i < nLeadingDims; ++i) {
+      auto dimensionAttr = rewriter.getI64IntegerAttr(i);
+      Value dimensionValue = rewriter.create<ConstantIntOp>(loc, dimensionAttr);
+      Value leadingDimSize =
+          rewriter.createOrFold<AtenSizeIntOp>(loc, inValue, dimensionValue);
+      prePermuteShape.push_back(leadingDimSize);
+      postPermuteShape.push_back(leadingDimSize);
+      outShape.push_back(leadingDimSize);
+      permutation.push_back(dimensionValue);
+
+    }
+
+    const auto inOptionalDType = inType.getOptionalDtype();
+
+    auto getTypeFromShape = [inOptionalDType](auto &&vals) {
+      // Get a vector of integers from a vector of Values.
+      auto getIntShape = [](auto &&vals) {
+        SmallVector<int64_t> shape;
+        shape.reserve(vals.size());
+        for (auto v : vals) {
+          int64_t cst_val;
+          if (matchPattern(v, m_TorchConstantInt(&cst_val))) {
+            shape.push_back(cst_val);
+          } else {
+            shape.push_back(kUnknownSize);
+          }
+        }
+        return shape;
+      };
+
+      const auto intShape = getIntShape(vals);
+      return ValueTensorType::get(vals[0].getContext(),
+                                  llvm::ArrayRef(intShape), inOptionalDType);
+    };
+
+    prePermuteShape.insert(prePermuteShape.end(),
+                           {outC, factor, factor, inH, inW});
+
+    postPermuteShape.insert(postPermuteShape.end(),
+                            {outC, inH, factor, inW, factor});
+
+    outShape.insert(outShape.end(), {outC, outH, outW});
+
+    SmallVector<uint64_t> permutationTail{0, 3, 1, 4, 2};
+    for (uint64_t d : permutationTail) {
+      permutation.push_back(rewriter.create<ConstantIntOp>(
+          loc, rewriter.getI64IntegerAttr(nLeadingDims + d)));
+    }
+
+    auto listType = Torch::ListType::get(Torch::IntType::get(op.getContext()));
+
+    Value shapeA =
+        rewriter.create<PrimListConstructOp>(loc, listType, prePermuteShape);
+
+    Value A = rewriter.create<AtenReshapeOp>(
+        loc, getTypeFromShape(prePermuteShape), inValue, shapeA);
+
+    Value permuteDimsOrder = rewriter.create<PrimListConstructOp>(
+        loc, Torch::ListType::get(Torch::IntType::get(op->getContext())),
+        permutation);
+
+    Value B = rewriter.create<AtenPermuteOp>(
+        loc, getTypeFromShape(postPermuteShape), A, permuteDimsOrder);
+
+    Value outShapeList =
+        rewriter.create<PrimListConstructOp>(loc, listType, outShape);
+
+    auto deducedReturnType = getTypeFromShape(outShape);
+
+    rewriter.replaceOpWithNewOp<AtenReshapeOp>(op, deducedReturnType, B,
+                                               outShapeList);
+    return success();
+  }
+};
+} // namespace
+
 // ReLU6(x) = min(max(0, x), 6) = min(Relu(x), 6)
-static Value getRelu6Results(PatternRewriter &rewriter, Location loc,
-                             Value input) {
+static Value
+getRelu6Results(PatternRewriter &rewriter, Location loc, Value input) {
   BaseTensorType inputType = input.getType().cast<BaseTensorType>();
 
   Value relu = rewriter.create<AtenReluOp>(loc, inputType, input);
@@ -4717,8 +4892,7 @@ class DecomposePrimsSqueezeOp : public OpRewritePattern<PrimsSqueezeOp> {
       return rewriter.notifyMatchFailure(
           op, "all dimensions must be constant ints");
 
-    std::sort(dimensions.begin(), dimensions.end());
-    std::reverse(dimensions.begin(), dimensions.end());
+    std::sort(dimensions.rbegin(), dimensions.rend());
 
     if (dimensions.size() == 0) {
       rewriter.replaceOp(op, input);
@@ -5463,6 +5637,7 @@ class DecomposeComplexOpsPass
     addPatternIfTargetOpIsIllegal<DecomposeAtenSelectIntOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenMatmulOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenMvOp>(patterns);
+    addPatternIfTargetOpIsIllegal<DecomposeAtenPixelShuffleOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenTOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAten_LogSoftmaxBackwardDataOp>(
         patterns);

diff --git a/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp b/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp
@@ -391,6 +391,7 @@ static void markDecomposedOpsAsIllegal(MLIRContext *context,
   target.addIllegalOp<AtenNormScalarOptDimOp>();
   target.addIllegalOp<AtenSelectIntOp>();
   target.addIllegalOp<AtenMvOp>();
+  target.addIllegalOp<AtenPixelShuffleOp>();
   target.addIllegalOp<AtenTOp>();
   target.addIllegalOp<Aten_LogSoftmaxBackwardDataOp>();
   target.addDynamicallyLegalOp<AtenMatmulOp>([](AtenMatmulOp op) {

diff --git a/lib/Dialect/Torch/Utils/Utils.cpp b/lib/Dialect/Torch/Utils/Utils.cpp
@@ -206,7 +206,7 @@ bool Torch::isViewLikeOp(Operation *op) {
              TensorStaticInfoCastOp, AtenToDtypeLayoutOp, AtenNumpyTOp,
              AtenNarrowOp, AtenNarrowTensorOp, AtenToDeviceOp, PrimsSqueezeOp,
              AtenMovedimIntOp, PrimsViewOfOp, AtenRealOp, AtenImagOp,
-             AtenViewAsComplexOp, AtenViewAsRealOp>(op);
+             AtenViewAsComplexOp, AtenViewAsRealOp, AtenPixelShuffleOp>(op);
 }
 
 Value Torch::getConstantWithGivenDtypeAndValue(PatternRewriter &rewriter,

diff --git a/projects/pt1/e2e_testing/xfail_sets.py b/projects/pt1/e2e_testing/xfail_sets.py
@@ -941,6 +941,8 @@
 # Write the TOSA set as a "passing" set as it is very early in development
 # and very few tests work yet.
 TOSA_PASS_SET = {
+    "PixelShuffleModuleStatic_12_2_3_basic",
+    "PixelShuffleModuleStatic_3_18_2_2_basic",
     "IscloseStaticModule_basic",
     "IscloseStaticModuleTrue_basic",
     "TileBigDimsSizeModule_basic",
@@ -1350,6 +1352,8 @@
 }
 
 LTC_XFAIL_SET = {
+    "PixelShuffleModuleStatic_12_2_3_basic",
+    "PixelShuffleModuleStatic_3_18_2_2_basic",
     "_Convolution2DAllFalseModule_basic",
     "_Convolution2DBenchmarkModule_basic",
     "_Convolution2DCudnnModule_basic",