[mlir][bufferization] Implement e2e IR transformation for static memory planner This adds the complete transformation pass that converts multiple memref.alloc/dealloc pairs into a single arena with subviews. The offset assignment is intentionally simple (just sequential) - this establishes the e2e pipeline so we can add smarter bin-packing later. Tests verify arena sizing, sequential offsets, and that dynamic shapes or missing deallocations are correctly skipped.
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/StaticMemoryPlannerAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/StaticMemoryPlannerAnalysis.cpp index 98803bc..dbf2661 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/StaticMemoryPlannerAnalysis.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/StaticMemoryPlannerAnalysis.cpp
@@ -1,4 +1,4 @@ -//===- StaticMemoryPlannerAnalysis.cpp - Analysis for static memory -------===// +//===- StaticMemoryPlannerAnalysis.cpp - Static memory planning -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,23 +6,21 @@ // //===----------------------------------------------------------------------===// // -// Discovers same-block memref.alloc/memref.dealloc pairs and analyzes their -// lifetime relationships to identify opportunities for memory reuse. -// -// This pass performs basic structural analysis without computing actual memory -// layouts. It reports which allocations are eligible for static planning and -// whether pairs of allocations have non-overlapping lifetimes (can reuse). +// Transforms memref.alloc/memref.dealloc pairs into a single arena allocation +// with subviews. Uses simple sequential offset assignment where each allocation +// gets its own space without overlap (baseline algorithm for e2e testing). // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Bufferization/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/Builders.h" #include "mlir/IR/Operation.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "llvm/Support/Debug.h" -#define DEBUG_TYPE "static-memory-planner-analysis" +#define DEBUG_TYPE "static-memory-planner" namespace mlir { namespace bufferization { @@ -31,18 +29,18 @@ } // namespace bufferization } // namespace mlir -using namespace mlir; - namespace { //===----------------------------------------------------------------------===// // Data structures //===----------------------------------------------------------------------===// -/// A candidate allocation with its matching deallocation. +/// A candidate allocation with its matching deallocation and assigned offset. struct AllocationCandidate { - memref::AllocOp alloc; - memref::DeallocOp dealloc; + mlir::memref::AllocOp alloc; + mlir::memref::DeallocOp dealloc; + int64_t offset = 0; // Offset in elements from arena start + int64_t sizeInElements = 0; // Size in elements }; //===----------------------------------------------------------------------===// @@ -51,10 +49,10 @@ /// Finds the unique dealloc operation for a given alloc value. /// Returns nullptr if there are zero or multiple deallocs. -static memref::DeallocOp findUniqueDealloc(Value allocValue) { - memref::DeallocOp deallocOp = nullptr; - for (Operation *user : allocValue.getUsers()) { - if (auto dealloc = dyn_cast<memref::DeallocOp>(user)) { +static mlir::memref::DeallocOp findUniqueDealloc(mlir::Value allocValue) { + mlir::memref::DeallocOp deallocOp = nullptr; + for (mlir::Operation *user : allocValue.getUsers()) { + if (auto dealloc = mlir::dyn_cast<mlir::memref::DeallocOp>(user)) { if (deallocOp) return nullptr; // Multiple deallocs found deallocOp = dealloc; @@ -63,29 +61,12 @@ return deallocOp; } -/// Checks if two allocation candidates have non-overlapping lifetimes. -/// Returns true if the first's dealloc is strictly before the second's alloc, -/// or vice versa. -static bool canReuseMemory(const AllocationCandidate &first, - const AllocationCandidate &second) { - Operation *firstDealloc = first.dealloc; - Operation *firstAlloc = first.alloc; - Operation *secondDealloc = second.dealloc; - Operation *secondAlloc = second.alloc; - - // Check if both are in the same block - if (firstAlloc->getBlock() != secondAlloc->getBlock()) - return false; - - // Check if first ends before second starts - if (firstDealloc->isBeforeInBlock(secondAlloc)) - return true; - - // Check if second ends before first starts - if (secondDealloc->isBeforeInBlock(firstAlloc)) - return true; - - return false; +/// Compute the number of elements in a static-shape memref. +static int64_t computeSizeInElements(mlir::MemRefType memrefType) { + int64_t size = 1; + for (int64_t dim : memrefType.getShape()) + size *= dim; + return size; } //===----------------------------------------------------------------------===// @@ -93,67 +74,97 @@ //===----------------------------------------------------------------------===// struct StaticMemoryPlannerAnalysisPass - : public bufferization::impl::StaticMemoryPlannerAnalysisPassBase< + : public mlir::bufferization::impl::StaticMemoryPlannerAnalysisPassBase< StaticMemoryPlannerAnalysisPass> { public: StaticMemoryPlannerAnalysisPass() = default; void runOnOperation() override { - Operation *op = getOperation(); + mlir::Operation *op = getOperation(); - // Collect eligible allocation candidates - SmallVector<AllocationCandidate> candidates; + // Step 1: Collect eligible allocation candidates + llvm::SmallVector<AllocationCandidate> candidates; - op->walk([&](memref::AllocOp allocOp) { - MemRefType memrefType = allocOp.getType(); + op->walk([&](mlir::memref::AllocOp allocOp) { + mlir::MemRefType memrefType = allocOp.getType(); // Skip dynamic shapes if (!memrefType.hasStaticShape()) { ++numSkipDynamic; - allocOp.emitRemark("static-memory-planner: skip: dynamic shape"); return; } // Find unique dealloc in the same block - memref::DeallocOp deallocOp = findUniqueDealloc(allocOp.getResult()); + mlir::memref::DeallocOp deallocOp = findUniqueDealloc(allocOp.getResult()); if (!deallocOp) { ++numSkipNoDealloc; - allocOp.emitRemark( - "static-memory-planner: skip: no unique dealloc"); return; } if (deallocOp->getBlock() != allocOp->getBlock()) { ++numSkipNoDealloc; - allocOp.emitRemark( - "static-memory-planner: skip: dealloc in different block"); return; } // This allocation is eligible ++numEligible; - allocOp.emitRemark("static-memory-planner: eligible"); - candidates.push_back({allocOp, deallocOp}); + AllocationCandidate candidate; + candidate.alloc = allocOp; + candidate.dealloc = deallocOp; + candidate.sizeInElements = computeSizeInElements(memrefType); + candidates.push_back(candidate); }); - // Analyze reuse opportunities between pairs of candidates - unsigned numReusable = 0; - for (size_t i = 0; i < candidates.size(); ++i) { - for (size_t j = i + 1; j < candidates.size(); ++j) { - if (canReuseMemory(candidates[i], candidates[j])) { - ++numReusable; - LLVM_DEBUG(llvm::dbgs() - << "[static-memory-planner] reuse opportunity: alloc " - << i << " and alloc " << j << "\n"); - } - } + if (candidates.empty()) + return; + + // Step 2: Compute simple sequential offsets (no overlap optimization) + int64_t totalSize = 0; + for (auto &candidate : candidates) { + candidate.offset = totalSize; + totalSize += candidate.sizeInElements; + LLVM_DEBUG(llvm::dbgs() << "[static-memory-planner] offset=" + << candidate.offset + << " size=" << candidate.sizeInElements << "\n"); } - LLVM_DEBUG(llvm::dbgs() << "[static-memory-planner] summary: eligible=" - << (unsigned)numEligible << " skip-dynamic=" - << (unsigned)numSkipDynamic << " skip-no-dealloc=" - << (unsigned)numSkipNoDealloc << " reusable-pairs=" - << numReusable << "\n"); + // Step 3: Find the first allocation's location to place arena + mlir::Operation *firstAlloc = candidates.front().alloc; + mlir::OpBuilder builder(firstAlloc); + + // Get element type from first allocation (assume all same type for now) + mlir::Type elementType = candidates.front().alloc.getType().getElementType(); + + // Step 4: Create arena allocation + auto arenaType = mlir::MemRefType::get({totalSize}, elementType); + auto arenaAlloc = mlir::memref::AllocOp::create(builder, firstAlloc->getLoc(), arenaType); + + LLVM_DEBUG(llvm::dbgs() << "[static-memory-planner] created arena: size=" + << totalSize << " elements\n"); + + // Step 5: Replace each alloc with a subview and remove deallocs + for (auto &candidate : candidates) { + mlir::OpBuilder subviewBuilder(candidate.alloc); + + // Create subview into arena + llvm::SmallVector<mlir::OpFoldResult> offsets, sizes, strides; + + // Single offset into flat arena + offsets.push_back(subviewBuilder.getIndexAttr(candidate.offset)); + sizes.push_back(subviewBuilder.getIndexAttr(candidate.sizeInElements)); + strides.push_back(subviewBuilder.getIndexAttr(1)); + + auto subview = mlir::memref::SubViewOp::create( + subviewBuilder, candidate.alloc.getLoc(), arenaAlloc.getResult(), + offsets, sizes, strides); + + // Replace all uses of the original alloc + candidate.alloc.getResult().replaceAllUsesWith(subview.getResult()); + + // Remove the original alloc and dealloc + candidate.alloc.erase(); + candidate.dealloc.erase(); + } } };
diff --git a/mlir/test/Dialect/Bufferization/Transforms/static-memory-planner-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/static-memory-planner-analysis.mlir index a8c7c72..1cde192 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/static-memory-planner-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/static-memory-planner-analysis.mlir
@@ -1,14 +1,18 @@ // RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(static-memory-planner-analysis))" \ -// RUN: -split-input-file -verify-diagnostics +// RUN: -split-input-file | FileCheck %s // ----- // Test 1: Simple sequential alloc/dealloc pairs +// CHECK-LABEL: func @simple_sequential func.func @simple_sequential() { - // expected-remark @below {{static-memory-planner: eligible}} + // CHECK: %[[ARENA:.*]] = memref.alloc() : memref<1536xf32> + // CHECK-NEXT: %[[SUBVIEW0:.*]] = memref.subview %[[ARENA]][0] [1024] [1] : memref<1536xf32> to memref<1024xf32, strided<[1]>> + // CHECK-NEXT: %[[SUBVIEW1:.*]] = memref.subview %[[ARENA]][1024] [512] [1] : memref<1536xf32> to memref<512xf32, strided<[1], offset: 1024>> + // CHECK-NOT: memref.alloc + // CHECK-NOT: memref.dealloc %alloc0 = memref.alloc() : memref<1024xf32> memref.dealloc %alloc0 : memref<1024xf32> - // expected-remark @below {{static-memory-planner: eligible}} %alloc1 = memref.alloc() : memref<512xf32> memref.dealloc %alloc1 : memref<512xf32> return @@ -16,9 +20,11 @@ // ----- -// Test 2: Dynamic shape - should be skipped +// Test 2: Dynamic shape - should be skipped (no transformation) +// CHECK-LABEL: func @dynamic_shape_skipped func.func @dynamic_shape_skipped(%n: index) { - // expected-remark @below {{static-memory-planner: skip: dynamic shape}} + // CHECK: %[[ALLOC:.*]] = memref.alloc(%{{.*}}) : memref<?xf32> + // CHECK-NOT: memref.subview %alloc = memref.alloc(%n) : memref<?xf32> return } @@ -26,8 +32,10 @@ // ----- // Test 3: No dealloc - should be skipped +// CHECK-LABEL: func @no_dealloc_skipped func.func @no_dealloc_skipped() { - // expected-remark @below {{static-memory-planner: skip: no unique dealloc}} + // CHECK: %[[ALLOC:.*]] = memref.alloc() : memref<1024xf32> + // CHECK-NOT: memref.subview %alloc = memref.alloc() : memref<1024xf32> return } @@ -35,8 +43,12 @@ // ----- // Test 4: Dealloc in different block - should be skipped +// CHECK-LABEL: func @different_block_skipped func.func @different_block_skipped(%cond: i1) { - // expected-remark @below {{static-memory-planner: skip: dealloc in different block}} + // CHECK: %[[ALLOC:.*]] = memref.alloc() : memref<1024xf32> + // CHECK: scf.if + // CHECK: memref.dealloc %[[ALLOC]] + // CHECK-NOT: memref.subview %alloc = memref.alloc() : memref<1024xf32> scf.if %cond { memref.dealloc %alloc : memref<1024xf32> @@ -47,11 +59,15 @@ // ----- -// Test 5: Overlapping lifetimes +// Test 5: Overlapping lifetimes (both eligible, sequential offsets) +// CHECK-LABEL: func @overlapping_lifetimes func.func @overlapping_lifetimes() { - // expected-remark @below {{static-memory-planner: eligible}} + // CHECK: %[[ARENA:.*]] = memref.alloc() : memref<1536xf32> + // CHECK-NEXT: %[[SUBVIEW0:.*]] = memref.subview %[[ARENA]][0] [512] [1] : memref<1536xf32> to memref<512xf32, strided<[1]>> + // CHECK-NEXT: %[[SUBVIEW1:.*]] = memref.subview %[[ARENA]][512] [1024] [1] : memref<1536xf32> to memref<1024xf32, strided<[1], offset: 512>> + // CHECK-NOT: memref.alloc + // CHECK-NOT: memref.dealloc %alloc0 = memref.alloc() : memref<512xf32> - // expected-remark @below {{static-memory-planner: eligible}} %alloc1 = memref.alloc() : memref<1024xf32> memref.dealloc %alloc1 : memref<1024xf32> memref.dealloc %alloc0 : memref<512xf32> @@ -60,15 +76,19 @@ // ----- -// Test 6: Multiple allocations with non-overlapping lifetimes -func.func @multiple_reusable() { - // expected-remark @below {{static-memory-planner: eligible}} +// Test 6: Multiple allocations with sequential offsets +// CHECK-LABEL: func @multiple_sequential +func.func @multiple_sequential() { + // CHECK: %[[ARENA:.*]] = memref.alloc() : memref<3584xf32> + // CHECK-NEXT: %[[SUBVIEW0:.*]] = memref.subview %[[ARENA]][0] [1024] [1] : memref<3584xf32> to memref<1024xf32, strided<[1]>> + // CHECK-NEXT: %[[SUBVIEW1:.*]] = memref.subview %[[ARENA]][1024] [512] [1] : memref<3584xf32> to memref<512xf32, strided<[1], offset: 1024>> + // CHECK-NEXT: %[[SUBVIEW2:.*]] = memref.subview %[[ARENA]][1536] [2048] [1] : memref<3584xf32> to memref<2048xf32, strided<[1], offset: 1536>> + // CHECK-NOT: memref.alloc + // CHECK-NOT: memref.dealloc %alloc0 = memref.alloc() : memref<1024xf32> memref.dealloc %alloc0 : memref<1024xf32> - // expected-remark @below {{static-memory-planner: eligible}} %alloc1 = memref.alloc() : memref<512xf32> memref.dealloc %alloc1 : memref<512xf32> - // expected-remark @below {{static-memory-planner: eligible}} %alloc2 = memref.alloc() : memref<2048xf32> memref.dealloc %alloc2 : memref<2048xf32> return