Skip to content

Commit 90757b5

Browse files
[HWToLLVM] Do not take illegal shortcut in array_get lowering. (#9172)
Currently, the LLVM lowering of `hw.array_get` will attempt to reuse pointers to array values to avoid rematerializing the array on the stack. This is only legal as long as we can ensure the underlying buffer remains unchanged. It works within the confines of the HWToLLVM lowering, which always allocates a new buffer when modifying an array. However, operations of other dialects (e.g., `arc.state_write`) might still write to the pointed-to buffer between the `load` and the `array_get`, causing it to return an incorrect value.
1 parent 0a4f107 commit 90757b5

File tree

2 files changed

+28
-24
lines changed

2 files changed

+28
-24
lines changed

lib/Conversion/HWToLLVM/HWToLLVM.cpp

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -210,23 +210,15 @@ struct ArrayGetOpConversion : public ConvertOpToLLVMPattern<hw::ArrayGetOp> {
210210
LogicalResult
211211
matchAndRewrite(hw::ArrayGetOp op, OpAdaptor adaptor,
212212
ConversionPatternRewriter &rewriter) const override {
213-
214-
Value arrPtr;
215-
if (auto load = adaptor.getInput().getDefiningOp<LLVM::LoadOp>()) {
216-
// In this case the array was loaded from an existing address, so we can
217-
// just grab that address instead of reallocating the array on the stack.
218-
arrPtr = load.getAddr();
219-
} else {
220-
auto oneC = LLVM::ConstantOp::create(
221-
rewriter, op->getLoc(), IntegerType::get(rewriter.getContext(), 32),
222-
rewriter.getI32IntegerAttr(1));
223-
arrPtr = LLVM::AllocaOp::create(
224-
rewriter, op->getLoc(),
225-
LLVM::LLVMPointerType::get(rewriter.getContext()),
226-
adaptor.getInput().getType(), oneC,
227-
/*alignment=*/4);
228-
LLVM::StoreOp::create(rewriter, op->getLoc(), adaptor.getInput(), arrPtr);
229-
}
213+
auto oneC = LLVM::ConstantOp::create(
214+
rewriter, op->getLoc(), IntegerType::get(rewriter.getContext(), 32),
215+
rewriter.getI32IntegerAttr(1));
216+
Value arrPtr = LLVM::AllocaOp::create(
217+
rewriter, op->getLoc(),
218+
LLVM::LLVMPointerType::get(rewriter.getContext()),
219+
adaptor.getInput().getType(), oneC,
220+
/*alignment=*/4);
221+
LLVM::StoreOp::create(rewriter, op->getLoc(), adaptor.getInput(), arrPtr);
230222

231223
auto arrTy = typeConverter->convertType(op.getInput().getType());
232224
auto elemTy = typeConverter->convertType(op.getResult().getType());

test/Conversion/HWToLLVM/convert_aggregates.mlir

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,13 +151,6 @@ func.func @convertConstArray(%arg0 : i1, %arg1 : i32) {
151151
// CHECK-NEXT: %[[VAL_3:.*]] = llvm.load %[[VAL_2]] : !llvm.ptr -> !llvm.array<2 x i32>
152152
%0 = hw.aggregate_constant [0 : i32, 1 : i32] : !hw.array<2xi32>
153153

154-
// COM: Test: when the array argument is already a load from a pointer,
155-
// COM: then don't allocate on the stack again but take that pointer directly as a shortcut
156-
// CHECK-NEXT: %[[VAL_5:.*]] = llvm.zext %arg0 : i1 to i2
157-
// CHECK-NEXT: %[[VAL_6:.*]] = llvm.getelementptr %[[VAL_2]][0, %[[VAL_5]]] : (!llvm.ptr, i2) -> !llvm.ptr, !llvm.array<2 x i32>
158-
// CHECK-NEXT: %{{.+}} = llvm.load %[[VAL_6]] : !llvm.ptr -> i32
159-
%1 = hw.array_get %0[%arg0] : !hw.array<2xi32>, i1
160-
161154
// COM: Test: nested constant array can also converted to a constant global
162155
// CHECK: %[[VAL_7:.*]] = llvm.mlir.addressof @[[GLOB2]] : !llvm.ptr
163156
// CHECK-NEXT: %{{.+}} = llvm.load %[[VAL_7]] : !llvm.ptr -> !llvm.array<2 x array<2 x i32>>
@@ -237,3 +230,22 @@ func.func @nestedStructInject(%arg0: !hw.struct<a: i1, b: !hw.struct<c: i1>>, %a
237230
%0 = hw.struct_inject %arg0["b"], %arg1 : !hw.struct<a: i1, b: !hw.struct<c: i1>>
238231
return
239232
}
233+
234+
// CHECK-LABEL: @issue9171
235+
func.func @issue9171(%idx: i1) -> i32 {
236+
// CHECK: [[ARRPTR:%.+]] = llvm.mlir.addressof
237+
// CHECK: [[ARRVAL:%.+]] = llvm.load [[ARRPTR]]
238+
%cst = hw.aggregate_constant [0 : i32, 1 : i32] : !hw.array<2xi32>
239+
240+
// COM: Until we do proper RAW dependency checking array_get cannot assume
241+
// COM: the array's backing buffer to be immutable, so it has to rematerialize
242+
// COM: [[ARRVAL]] on the stack and must not reuse [[ARRPTR]].
243+
244+
// CHECK: [[ALLOCA:%.+]] = llvm.alloca
245+
// CHECK: llvm.store [[ARRVAL]], [[ALLOCA]]
246+
// CHECK: [[VALPTR:%.+]] = llvm.getelementptr [[ALLOCA]]
247+
// CHECK: [[RETVAL:%.+]] = llvm.load [[VALPTR]]
248+
// CHECK: return [[RETVAL]]
249+
%get = hw.array_get %cst[%idx] : !hw.array<2xi32>, i1
250+
return %get : i32
251+
}

0 commit comments

Comments
 (0)