Auto merge of #131460 - jwong101:default-placement-new, r=ibraheemdev
Optimize `Box::default` and `Arc::default` to construct more types in place
Both the `Arc` and `Box` `Default` impls currently call `T::default()` before allocating, and then moving the resulting `T` into the allocation.
Most `Default` impls are trivial, which should in theory allow
LLVM to construct `T: Default` directly in the `Box` allocation when calling
`<Box<T>>::default()`.
However, the allocation may fail, which necessitates calling `T`'s destructor if it has one.
If the destructor is non-trivial, then LLVM has a hard time proving that it's
sound to elide, which makes it construct `T` on the stack first, and then copy it into the allocation.
Change both of these impls to allocate first, and then call `T::default` into the uninitialized allocation, so that LLVM doesn't have to prove that it's sound to elide the destructor/initial stack copy.
For example, given the following Rust code:
```rust
#[derive(Default, Clone)]
struct Foo {
x: Vec<u8>,
z: String,
y: Vec<u8>,
}
#[no_mangle]
pub fn src() -> Box<Foo> {
Box::default()
}
```
<details open>
<summary>Before this PR:</summary>
```llvm
`@__rust_no_alloc_shim_is_unstable` = external global i8
; drop_in_place() generated in case the allocation fails
; core::ptr::drop_in_place<playground::Foo>
; Function Attrs: nounwind nonlazybind uwtable
define internal fastcc void `@"_ZN4core3ptr36drop_in_place$LT$playground..Foo$GT$17hff376aece491233bE"(ptr` noalias nocapture noundef readonly align 8 dereferenceable(72) %_1) unnamed_addr #0 personality ptr `@rust_eh_personality` {
start:
%_1.val = load i64, ptr %_1, align 8
%0 = icmp eq i64 %_1.val, 0
br i1 %0, label %bb6, label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i"
"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i": ; preds = %start
%1 = getelementptr inbounds i8, ptr %_1, i64 8
%_1.val6 = load ptr, ptr %1, align 8, !nonnull !3, !noundef !3
tail call void `@__rust_dealloc(ptr` noundef nonnull %_1.val6, i64 noundef %_1.val, i64 noundef 1) #8
br label %bb6
bb6: ; preds = %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i", %start
%2 = getelementptr inbounds i8, ptr %_1, i64 24
%.val9 = load i64, ptr %2, align 8
%3 = icmp eq i64 %.val9, 0
br i1 %3, label %bb5, label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i.i11"
"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i.i11": ; preds = %bb6
%4 = getelementptr inbounds i8, ptr %_1, i64 32
%.val10 = load ptr, ptr %4, align 8, !nonnull !3, !noundef !3
tail call void `@__rust_dealloc(ptr` noundef nonnull %.val10, i64 noundef %.val9, i64 noundef 1) #8
br label %bb5
bb5: ; preds = %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i.i11", %bb6
%5 = getelementptr inbounds i8, ptr %_1, i64 48
%.val4 = load i64, ptr %5, align 8
%6 = icmp eq i64 %.val4, 0
br i1 %6, label %"_ZN4core3ptr46drop_in_place$LT$alloc..vec..Vec$LT$u8$GT$$GT$17hb5ca95423e113cf7E.exit16", label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i15"
"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i15": ; preds = %bb5
%7 = getelementptr inbounds i8, ptr %_1, i64 56
%.val5 = load ptr, ptr %7, align 8, !nonnull !3, !noundef !3
tail call void `@__rust_dealloc(ptr` noundef nonnull %.val5, i64 noundef %.val4, i64 noundef 1) #8
br label %"_ZN4core3ptr46drop_in_place$LT$alloc..vec..Vec$LT$u8$GT$$GT$17hb5ca95423e113cf7E.exit16"
"_ZN4core3ptr46drop_in_place$LT$alloc..vec..Vec$LT$u8$GT$$GT$17hb5ca95423e113cf7E.exit16": ; preds = %bb5, %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i15"
ret void
}
; Function Attrs: nonlazybind uwtable
define noalias noundef nonnull align 8 ptr `@src()` unnamed_addr #1 personality ptr `@rust_eh_personality` {
start:
; alloca to place `Foo` in.
%_1 = alloca [72 x i8], align 8
call void `@llvm.lifetime.start.p0(i64` 72, ptr nonnull %_1)
store i64 0, ptr %_1, align 8
%_2.sroa.4.0._1.sroa_idx = getelementptr inbounds i8, ptr %_1, i64 8
store ptr inttoptr (i64 1 to ptr), ptr %_2.sroa.4.0._1.sroa_idx, align 8
%_2.sroa.5.0._1.sroa_idx = getelementptr inbounds i8, ptr %_1, i64 16
%_3.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 32
call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_2.sroa.5.0._1.sroa_idx, i8 0, i64 16, i1 false)
store ptr inttoptr (i64 1 to ptr), ptr %_3.sroa.4.0..sroa_idx, align 8
%_3.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 40
%_4.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 56
call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_3.sroa.5.0..sroa_idx, i8 0, i64 16, i1 false)
store ptr inttoptr (i64 1 to ptr), ptr %_4.sroa.4.0..sroa_idx, align 8
%_4.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 64
store i64 0, ptr %_4.sroa.5.0..sroa_idx, align 8
%0 = load volatile i8, ptr `@__rust_no_alloc_shim_is_unstable,` align 1, !noalias !4
%_0.i.i.i = tail call noalias noundef align 8 dereferenceable_or_null(72) ptr `@__rust_alloc(i64` noundef 72, i64 noundef 8) #8, !noalias !4
%1 = icmp eq ptr %_0.i.i.i, null
br i1 %1, label %bb2.i, label %"_ZN5alloc5boxed12Box$LT$T$GT$3new17h0864de14f863a27aE.exit"
bb2.i: ; preds = %start
; invoke alloc::alloc::handle_alloc_error
invoke void `@_ZN5alloc5alloc18handle_alloc_error17h98142d0d8d74161bE(i64` noundef 8, i64 noundef 72) #9
to label %.noexc unwind label %cleanup.i
.noexc: ; preds = %bb2.i
unreachable
cleanup.i: ; preds = %bb2.i
%2 = landingpad { ptr, i32 }
cleanup
; call core::ptr::drop_in_place<playground::Foo>
call fastcc void `@"_ZN4core3ptr36drop_in_place$LT$playground..Foo$GT$17hff376aece491233bE"(ptr` noalias noundef nonnull align 8 dereferenceable(72) %_1) #10
resume { ptr, i32 } %2
"_ZN5alloc5boxed12Box$LT$T$GT$3new17h0864de14f863a27aE.exit": ; preds = %start
; Copy from stack to heap if allocation is successful
call void `@llvm.memcpy.p0.p0.i64(ptr` noundef nonnull align 8 dereferenceable(72) %_0.i.i.i, ptr noundef nonnull align 8 dereferenceable(72) %_1, i64 72, i1 false)
call void `@llvm.lifetime.end.p0(i64` 72, ptr nonnull %_1)
ret ptr %_0.i.i.i
}
```
</details>
<details>
<summary>After this PR</summary>
```llvm
; Notice how there's no `drop_in_place()` generated as well
define noalias noundef nonnull align 8 ptr `@src()` unnamed_addr #0 personality ptr `@rust_eh_personality` {
start:
; no stack allocation
%0 = load volatile i8, ptr `@__rust_no_alloc_shim_is_unstable,` align 1
%_0.i.i.i.i.i = tail call noalias noundef align 8 dereferenceable_or_null(72) ptr `@__rust_alloc(i64` noundef 72, i64 noundef 8) #5
%1 = icmp eq ptr %_0.i.i.i.i.i, null
br i1 %1, label %bb3.i, label %"_ZN5alloc5boxed16Box$LT$T$C$A$GT$13new_uninit_in17h80d6355ef4b73ea3E.exit"
bb3.i: ; preds = %start
; call alloc::alloc::handle_alloc_error
tail call void `@_ZN5alloc5alloc18handle_alloc_error17h98142d0d8d74161bE(i64` noundef 8, i64 noundef 72) #6
unreachable
"_ZN5alloc5boxed16Box$LT$T$C$A$GT$13new_uninit_in17h80d6355ef4b73ea3E.exit": ; preds = %start
; construct `Foo` directly into the allocation if successful
store i64 0, ptr %_0.i.i.i.i.i, align 8
%_8.sroa.4.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 8
store ptr inttoptr (i64 1 to ptr), ptr %_8.sroa.4.0._1.sroa_idx, align 8
%_8.sroa.5.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 16
%_8.sroa.7.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 32
tail call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_8.sroa.5.0._1.sroa_idx, i8 0, i64 16, i1 false)
store ptr inttoptr (i64 1 to ptr), ptr %_8.sroa.7.0._1.sroa_idx, align 8
%_8.sroa.8.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 40
%_8.sroa.10.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 56
tail call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_8.sroa.8.0._1.sroa_idx, i8 0, i64 16, i1 false)
store ptr inttoptr (i64 1 to ptr), ptr %_8.sroa.10.0._1.sroa_idx, align 8
%_8.sroa.11.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 64
store i64 0, ptr %_8.sroa.11.0._1.sroa_idx, align 8
ret ptr %_0.i.i.i.i.i
}
```
</details>
diff --git a/library/alloc/src/boxed.rs b/library/alloc/src/boxed.rs
index 5f20729..3e79141 100644
--- a/library/alloc/src/boxed.rs
+++ b/library/alloc/src/boxed.rs
@@ -1688,7 +1688,7 @@
/// Creates a `Box<T>`, with the `Default` value for T.
#[inline]
fn default() -> Self {
- Box::new(T::default())
+ Box::write(Box::new_uninit(), T::default())
}
}
diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs
index 50bf385..fa4bb78 100644
--- a/library/alloc/src/lib.rs
+++ b/library/alloc/src/lib.rs
@@ -104,6 +104,7 @@
#![feature(async_closure)]
#![feature(async_fn_traits)]
#![feature(async_iterator)]
+#![feature(box_uninit_write)]
#![feature(clone_to_uninit)]
#![feature(coerce_unsized)]
#![feature(const_align_of_val)]
diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs
index 4632f99..acbc325 100644
--- a/library/alloc/src/sync.rs
+++ b/library/alloc/src/sync.rs
@@ -3447,7 +3447,13 @@
/// assert_eq!(*x, 0);
/// ```
fn default() -> Arc<T> {
- Arc::new(Default::default())
+ let x = Box::into_raw(Box::write(Box::new_uninit(), ArcInner {
+ strong: atomic::AtomicUsize::new(1),
+ weak: atomic::AtomicUsize::new(1),
+ data: T::default(),
+ }));
+ // SAFETY: `Box::into_raw` consumes the `Box` and never returns null
+ unsafe { Self::from_inner(NonNull::new_unchecked(x)) }
}
}
diff --git a/tests/codegen/placement-new.rs b/tests/codegen/placement-new.rs
new file mode 100644
index 0000000..edb25df
--- /dev/null
+++ b/tests/codegen/placement-new.rs
@@ -0,0 +1,27 @@
+//@ compile-flags: -O
+#![crate_type = "lib"]
+
+// Test to check that types with "complex" destructors, but trivial `Default` impls
+// are constructed directly into the allocation in `Box::default` and `Arc::default`.
+
+use std::sync::Arc;
+
+// CHECK-LABEL: @box_default_inplace
+#[no_mangle]
+pub fn box_default_inplace() -> Box<(String, String)> {
+ // CHECK-NOT: alloca
+ // CHECK: [[BOX:%.*]] = {{.*}}call {{.*}}__rust_alloc(
+ // CHECK-NOT: call void @llvm.memcpy
+ // CHECK: ret ptr [[BOX]]
+ Box::default()
+}
+
+// CHECK-LABEL: @arc_default_inplace
+#[no_mangle]
+pub fn arc_default_inplace() -> Arc<(String, String)> {
+ // CHECK-NOT: alloca
+ // CHECK: [[ARC:%.*]] = {{.*}}call {{.*}}__rust_alloc(
+ // CHECK-NOT: call void @llvm.memcpy
+ // CHECK: ret ptr [[ARC]]
+ Arc::default()
+}