diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index db969436a30712..d5c9ee41215ae9 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -633,8 +633,8 @@ void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc, SmallVectorImpl &out) { out.clear(); out.reserve(stt.getDimRank()); - for (const Size sh : stt.getDimShape()) { - const auto s = ShapedType::isDynamic(sh) ? 0 : sh; + for (const Size sz : stt.getDimShape()) { + const auto s = ShapedType::isDynamic(sz) ? 0 : sz; out.push_back(constantIndex(builder, loc, s)); } } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index 0364c9ac908c21..08c38394a46343 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -18,8 +18,6 @@ #include "CodegenUtils.h" #include "SparseTensorDescriptor.h" -#include "llvm/Support/FormatVariadic.h" - #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Func/IR/FuncOps.h" @@ -116,31 +114,36 @@ static void allocSchemeForRank(OpBuilder &builder, Location loc, const SparseTensorType stt(desc.getRankedTensorType()); Value linear = constantIndex(builder, loc, 1); const Level lvlRank = stt.getLvlRank(); - for (Level l = startLvl; l < lvlRank; l++) { - const auto dlt = stt.getLvlType(l); - if (isCompressedDLT(dlt)) { + for (Level lvl = startLvl; lvl < lvlRank; lvl++) { + const auto dlt = stt.getLvlType(lvl); + if (isCompressedDLT(dlt) || isLooseCompressedDLT(dlt)) { // Append linear x positions, initialized to zero. Since each compressed // dimension initially already has a single zero entry, this maintains - // the desired "linear + 1" length property at all times. + // the desired "linear + 1" length property at all times. For loose + // compression, we multiply linear by two in order to append both the + // lo/hi positions. Value posZero = constantZero(builder, loc, stt.getPosType()); - createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, l, - posZero, linear); + if (isLooseCompressedDLT(dlt)) { + Value two = constantIndex(builder, loc, 2); + linear = builder.create(loc, linear, two); + } + createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, lvl, + /*value=*/posZero, /*repeat=*/linear); return; - } - if (isSingletonDLT(dlt)) { + } else if (isSingletonDLT(dlt) || is2OutOf4DLT(dlt)) { return; // nothing to do } // Keep compounding the size, but nothing needs to be initialized // at this level. We will eventually reach a compressed level or // otherwise the values array for the from-here "all-dense" case. assert(isDenseDLT(dlt)); - Value size = desc.getLvlSize(builder, loc, l); + Value size = desc.getLvlSize(builder, loc, lvl); linear = builder.create(loc, linear, size); } // Reached values array so prepare for an insertion. Value valZero = constantZero(builder, loc, stt.getElementType()); createPushback(builder, loc, desc, SparseTensorFieldKind::ValMemRef, - std::nullopt, valZero, linear); + std::nullopt, /*value=*/valZero, /*repeat=*/linear); } /// Creates allocation operation. @@ -157,12 +160,9 @@ static Value createAllocation(OpBuilder &builder, Location loc, } /// Creates allocation for each field in sparse tensor type. Note that -/// for all dynamic memrefs, the memory size is really the capacity of -/// the "vector", while the actual size resides in the sizes array. -/// -/// TODO: for efficiency, we will need heuristics to make educated guesses -/// on the required capacities (see heuristic variable). -/// +/// for all dynamic memrefs in the sparse tensor stroage layout, the +/// memory size is really the capacity of the "vector", while the actual +/// size resides in the sizes array. static void createAllocFields(OpBuilder &builder, Location loc, SparseTensorType stt, ValueRange dynSizes, bool enableInit, SmallVectorImpl &fields, @@ -206,6 +206,8 @@ static void createAllocFields(OpBuilder &builder, Location loc, constantIndex(builder, loc, 16); } + // Initializes all fields. An initial storage specifier and allocated + // positions/coordinates/values memrefs (with heuristic capacity). foreachFieldAndTypeInSparseTensor( stt, [&builder, &fields, stt, loc, posHeuristic, crdHeuristic, valHeuristic, @@ -218,14 +220,16 @@ static void createAllocFields(OpBuilder &builder, Location loc, field = SparseTensorSpecifier::getInitValue(builder, loc, stt); break; case SparseTensorFieldKind::PosMemRef: + field = createAllocation(builder, loc, cast(fType), + posHeuristic, enableInit); + break; case SparseTensorFieldKind::CrdMemRef: + field = createAllocation(builder, loc, cast(fType), + crdHeuristic, enableInit); + break; case SparseTensorFieldKind::ValMemRef: - field = createAllocation( - builder, loc, cast(fType), - (fKind == SparseTensorFieldKind::PosMemRef) ? posHeuristic - : (fKind == SparseTensorFieldKind::CrdMemRef) ? crdHeuristic - : valHeuristic, - enableInit); + field = createAllocation(builder, loc, cast(fType), + valHeuristic, enableInit); break; } assert(field); @@ -234,21 +238,19 @@ static void createAllocFields(OpBuilder &builder, Location loc, return true; }); + // Initialize the storage scheme to an empty tensor. Sets the lvlSizes + // and gives all position fields an initial zero entry, so that it is + // easier to maintain the "linear + 1" length property. MutSparseTensorDescriptor desc(stt, fields); - - // Initialize the storage scheme to an empty tensor. Initialized memSizes - // to all zeros, sets the dimSizes to known values and gives all position - // fields an initial zero entry, so that it is easier to maintain the - // "linear + 1" length property. Value posZero = constantZero(builder, loc, stt.getPosType()); - for (Level lvlRank = stt.getLvlRank(), l = 0; l < lvlRank; l++) { - // Fills dim sizes array. + for (Level lvl = 0, lvlRank = stt.getLvlRank(); lvl < lvlRank; lvl++) { // FIXME: `toOrigDim` is deprecated. - desc.setLvlSize(builder, loc, l, dimSizes[toOrigDim(stt.getEncoding(), l)]); - // Pushes a leading zero to positions memref. - if (stt.isCompressedLvl(l)) - createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, l, - posZero); + desc.setLvlSize(builder, loc, lvl, + dimSizes[toOrigDim(stt.getEncoding(), lvl)]); + const auto dlt = stt.getLvlType(lvl); + if (isCompressedDLT(dlt) || isLooseCompressedDLT(dlt)) + createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, lvl, + /*value=*/posZero); } allocSchemeForRank(builder, loc, desc, /*rank=*/0); } @@ -347,7 +349,7 @@ static Value genCompressed(OpBuilder &builder, Location loc, Value mszp1 = builder.create(loc, msz, one); genStore(builder, loc, mszp1, positionsAtLvl, pp1); createPushback(builder, loc, desc, SparseTensorFieldKind::CrdMemRef, lvl, - lvlCoords[lvl]); + /*value=*/lvlCoords[lvl]); // Prepare the next level "as needed". if ((lvl + 1) < lvlRank) allocSchemeForRank(builder, loc, desc, lvl + 1); @@ -371,8 +373,6 @@ static void genEndInsert(OpBuilder &builder, Location loc, const Level lvlRank = stt.getLvlRank(); for (Level l = 0; l < lvlRank; l++) { const auto dlt = stt.getLvlType(l); - if (isLooseCompressedDLT(dlt)) - llvm_unreachable("TODO: Not yet implemented"); if (isCompressedDLT(dlt)) { // Compressed dimensions need a position cleanup for all entries // that were not visited during the insertion pass. @@ -407,7 +407,8 @@ static void genEndInsert(OpBuilder &builder, Location loc, builder.setInsertionPointAfter(loop); } } else { - assert(isDenseDLT(dlt) || isSingletonDLT(dlt)); + assert(isDenseDLT(dlt) || isLooseCompressedDLT(dlt) || + isSingletonDLT(dlt) || is2OutOf4DLT(dlt)); } } } @@ -483,33 +484,37 @@ class SparseInsertGenerator Value value = args.back(); Value parentPos = constantZero(builder, loc, builder.getIndexType()); // Generate code for every level. - for (Level l = 0; l < lvlRank; l++) { - const auto dlt = stt.getLvlType(l); - if (isCompressedDLT(dlt)) { + for (Level lvl = 0; lvl < lvlRank; lvl++) { + const auto dlt = stt.getLvlType(lvl); + if (isCompressedDLT(dlt) || isLooseCompressedDLT(dlt)) { // Create: // if (!present) { - // coordinates[l].push_back(coords[l]) - // + // coordinates[lvl].push_back(coords[lvl]) + // // } - // positions[l] = coordinates.size() - 1 - // + // positions[lvl] = coordinates.size() - 1 + // + if (isLooseCompressedDLT(dlt)) { + Value two = constantIndex(builder, loc, 2); + parentPos = builder.create(loc, parentPos, two); + } parentPos = - genCompressed(builder, loc, desc, coords, value, parentPos, l); - } else if (isSingletonDLT(dlt)) { + genCompressed(builder, loc, desc, coords, value, parentPos, lvl); + } else if (isSingletonDLT(dlt) || is2OutOf4DLT(dlt)) { // Create: - // coordinates[l].push_back(coords[l]) - // positions[l] = positions[l-1] - // - createPushback(builder, loc, desc, SparseTensorFieldKind::CrdMemRef, l, - coords[l]); + // coordinates[lvl].push_back(coords[lvl]) + // positions[lvl] = positions[lvl-1] + // + createPushback(builder, loc, desc, SparseTensorFieldKind::CrdMemRef, + lvl, /*value=*/coords[lvl]); } else { assert(isDenseDLT(dlt)); // Construct the new position as: - // positions[l] = size * positions[l-1] + coords[l] - // - Value size = desc.getLvlSize(builder, loc, l); + // positions[lvl] = size * positions[lvl-1] + coords[lvl] + // + Value size = desc.getLvlSize(builder, loc, lvl); Value mult = builder.create(loc, size, parentPos); - parentPos = builder.create(loc, mult, coords[l]); + parentPos = builder.create(loc, mult, coords[lvl]); } } // Reached the actual value append/insert. @@ -526,7 +531,6 @@ class SparseInsertGenerator // ______ constexpr const char kInsertFuncNamePrefix[] = "_insert_"; const SparseTensorType stt(llvm::cast(rtp)); - SmallString<32> nameBuffer; llvm::raw_svector_ostream nameOstream(nameBuffer); nameOstream << kInsertFuncNamePrefix; @@ -543,8 +547,8 @@ class SparseInsertGenerator // Static dim sizes are used in the generated code while dynamic sizes are // loaded from the dimSizes buffer. This is the reason for adding the shape // to the function name. - for (const auto sh : stt.getDimShape()) - nameOstream << sh << "_"; + for (const auto sz : stt.getDimShape()) + nameOstream << sz << "_"; // Permutation information is also used in generating insertion. if (!stt.isIdentity()) nameOstream << stt.getDimToLvl() << "_"; @@ -607,7 +611,6 @@ class SparseCallConverter : public OpConversionPattern { assert(retOffset < newCall.getNumResults()); auto retType = ret.getType(); if (failed(typeConverter->convertType(retType, sparseFlat))) - // This should never happen. llvm_unreachable("Failed to convert type in sparse tensor codegen"); // Converted types can not be empty when the type conversion succeed. @@ -755,9 +758,7 @@ class SparseTensorAllocConverter const auto resType = getSparseTensorType(op); if (!resType.hasEncoding()) return failure(); - - // Construct allocation for each field. - const Location loc = op.getLoc(); + Location loc = op.getLoc(); if (op.getCopy()) { auto desc = getDescriptorFromTensorTuple(adaptor.getCopy()); SmallVector fields; @@ -778,18 +779,18 @@ class SparseTensorAllocConverter return success(); } - const Value sizeHint = op.getSizeHint(); - const ValueRange dynSizes = adaptor.getDynamicSizes(); + // Construct allocation for each field. + Value sizeHint = op.getSizeHint(); + ValueRange dynSizes = adaptor.getDynamicSizes(); const size_t found = dynSizes.size(); const int64_t expected = resType.getNumDynamicDims(); if (found != static_cast(expected)) - return rewriter.notifyMatchFailure( - op, llvm::formatv( - "Got wrong number of dynamic sizes: Found={0}, Expected={1}", - found, expected)); + return rewriter.notifyMatchFailure(op, + "Got wrong number of dynamic sizes"); SmallVector fields; createAllocFields(rewriter, loc, resType, dynSizes, enableBufferInitialization, fields, sizeHint); + // Replace operation with resulting memrefs. rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields)); return success(); @@ -817,19 +818,18 @@ class SparseTensorEmptyConverter : public OpConversionPattern { return failure(); // Construct allocation for each field. - const Location loc = op.getLoc(); - const Value sizeHint; // none + Location loc = op.getLoc(); + Value sizeHint; // none const ValueRange dynSizes = adaptor.getDynamicSizes(); const size_t found = dynSizes.size(); const int64_t expected = resType.getNumDynamicDims(); if (found != static_cast(expected)) - return rewriter.notifyMatchFailure( - op, llvm::formatv( - "Got wrong number of dynamic sizes: Found={0}, Expected={1}", - found, expected)); + return rewriter.notifyMatchFailure(op, + "Got wrong number of dynamic sizes"); SmallVector fields; createAllocFields(rewriter, loc, resType, dynSizes, enableBufferInitialization, fields, sizeHint); + // Replace operation with resulting memrefs. rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields)); return success(); @@ -1496,7 +1496,6 @@ struct SparseNewConverter : public OpConversionPattern { SmallVector fields; createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false, fields, nse); - MutSparseTensorDescriptor desc(dstTp, fields); // Now construct the dim2lvl and lvl2dim buffers. Value dim2lvlBuffer; @@ -1505,6 +1504,7 @@ struct SparseNewConverter : public OpConversionPattern { dim2lvlBuffer, lvl2dimBuffer); // Read the COO tensor data. + MutSparseTensorDescriptor desc(dstTp, fields); Value xs = desc.getAOSMemRef(); Value ys = desc.getValMemRef(); const Type boolTp = rewriter.getIntegerType(1); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index e9d4005feaee38..4fe9c59d8c320a 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -380,14 +380,13 @@ class SparseTensorAllocConverter LogicalResult matchAndRewrite(bufferization::AllocTensorOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - if (op.getCopy()) - return rewriter.notifyMatchFailure(op, - "sparse tensor copy not implemented"); - Location loc = op.getLoc(); const auto stt = getSparseTensorType(op); if (!stt.hasEncoding()) return failure(); + if (op.getCopy()) + return rewriter.notifyMatchFailure(op, "alloc copy not implemented"); // Gather all dimension sizes as SSA values. + Location loc = op.getLoc(); const Dimension dimRank = stt.getDimRank(); SmallVector dimSizes; dimSizes.reserve(dimRank); diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir new file mode 100755 index 00000000000000..39d9b40439bcfb --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir @@ -0,0 +1,61 @@ +//-------------------------------------------------------------------------------------------------- +// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. +// +// Set-up that's shared across all tests in this directory. In principle, this +// config could be moved to lit.local.cfg. However, there are downstream users that +// do not use these LIT config files. Hence why this is kept inline. +// +// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true +// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts} +// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}" +// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}" +// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils +// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} +// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} +// +// DEFINE: %{env} = +//-------------------------------------------------------------------------------------------------- + +// RUN: %{compile} | env %{env} %{run} | FileCheck %s +// +// Do the same run, but now with direct IR generation. +// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false +// RUN: %{compile} | env %{env} %{run} | FileCheck %s + +#CSR_hi = #sparse_tensor.encoding<{ + map = (i, j) -> ( i : dense, j : loose_compressed) +}> + +module { + func.func @entry() { + %c0 = arith.constant 0 : index + %f0 = arith.constant 0.0 : f64 + %d = arith.constant dense<[[ 1.0, 2.0, 3.0, 4.0 ], + [ 5.0, 6.0, 7.0, 8.0 ], + [ 0.0, 0.0, 5.5, 0.0 ], + [ 9.0, 10.0, 11.0, 12.0 ], + [13.0, 14.0, 15.0, 16.0 ]]> : tensor<5x4xf64> + %s = sparse_tensor.convert %d : tensor<5x4xf64> to tensor<5x4xf64, #CSR_hi> + + // + // CHECK: ( 0, 4, 4, 8, 8, 9, 9, 13 ) + // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 2, 0, 1, 2, 3, 0, 1, 2, 3 ) + // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 5.5, 9, 10, 11, 12, 13, 14, 15, 16 ) + // + %pos = sparse_tensor.positions %s {level = 1 : index } : tensor<5x4xf64, #CSR_hi> to memref + %vecp = vector.transfer_read %pos[%c0], %c0 : memref, vector<8xindex> + vector.print %vecp : vector<8xindex> + %crd = sparse_tensor.coordinates %s {level = 1 : index } : tensor<5x4xf64, #CSR_hi> to memref + %vecc = vector.transfer_read %crd[%c0], %c0 : memref, vector<17xindex> + vector.print %vecc : vector<17xindex> + %val = sparse_tensor.values %s : tensor<5x4xf64, #CSR_hi> to memref + %vecv = vector.transfer_read %val[%c0], %f0 : memref, vector<17xf64> + vector.print %vecv : vector<17xf64> + + // Release the resources. + bufferization.dealloc_tensor %s: tensor<5x4xf64, #CSR_hi> + + return + } +}