Auto merge of #57655 - mtak-:fix-tls-dtors-macos, r=alexcrichton

OSX: fix #57534 registering thread dtors while running thread dtors

r? @alexcrichton

- "fast" `thread_local` destructors get run even on the main thread
- "fast" `thread_local` dtors, can initialize other `thread_local`'s

One corner case where this fix doesn't work, is when a C++ `thread_local` triggers the initialization of a rust `thread_local`.

I did not add any std::thread specific flag to indicate that the thread is currently exiting, which would be checked before registering a new dtor (I didn't really know where to stick that). I think this does the trick tho!

Let me know if anything needs tweaking/fixing/etc.

resolves this for macos: https://github.com/rust-lang/rust/issues/28129
fixes: https://github.com/rust-lang/rust/issues/57534
diff --git a/src/libstd/sys/unix/fast_thread_local.rs b/src/libstd/sys/unix/fast_thread_local.rs
index d48d701d..742ffd1 100644
--- a/src/libstd/sys/unix/fast_thread_local.rs
+++ b/src/libstd/sys/unix/fast_thread_local.rs
@@ -33,30 +33,57 @@
     register_dtor_fallback(t, dtor);
 }
 
-// macOS's analog of the above linux function is this _tlv_atexit function.
-// The disassembly of thread_local globals in C++ (at least produced by
-// clang) will have this show up in the output.
+// This implementation is very similar to register_dtor_fallback in
+// sys_common/thread_local.rs. The main difference is that we want to hook into
+// macOS's analog of the above linux function, _tlv_atexit. OSX will run the
+// registered dtors before any TLS slots get freed, and when the main thread
+// exits.
+//
+// Unfortunately, calling _tlv_atexit while tls dtors are running is UB. The
+// workaround below is to register, via _tlv_atexit, a custom DTOR list once per
+// thread. thread_local dtors are pushed to the DTOR list without calling
+// _tlv_atexit.
 #[cfg(target_os = "macos")]
 pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern fn(*mut u8)) {
+    use cell::Cell;
+    use ptr;
+
+    #[thread_local]
+    static REGISTERED: Cell<bool> = Cell::new(false);
+    if !REGISTERED.get() {
+        _tlv_atexit(run_dtors, ptr::null_mut());
+        REGISTERED.set(true);
+    }
+
+    type List = Vec<(*mut u8, unsafe extern fn(*mut u8))>;
+
+    #[thread_local]
+    static DTORS: Cell<*mut List> = Cell::new(ptr::null_mut());
+    if DTORS.get().is_null() {
+        let v: Box<List> = box Vec::new();
+        DTORS.set(Box::into_raw(v));
+    }
+
     extern {
         fn _tlv_atexit(dtor: unsafe extern fn(*mut u8),
                        arg: *mut u8);
     }
-    _tlv_atexit(dtor, t);
+
+    let list: &mut List = &mut *DTORS.get();
+    list.push((t, dtor));
+
+    unsafe extern fn run_dtors(_: *mut u8) {
+        let mut ptr = DTORS.replace(ptr::null_mut());
+        while !ptr.is_null() {
+            let list = Box::from_raw(ptr);
+            for (ptr, dtor) in list.into_iter() {
+                dtor(ptr);
+            }
+            ptr = DTORS.replace(ptr::null_mut());
+        }
+    }
 }
 
 pub fn requires_move_before_drop() -> bool {
-    // The macOS implementation of TLS apparently had an odd aspect to it
-    // where the pointer we have may be overwritten while this destructor
-    // is running. Specifically if a TLS destructor re-accesses TLS it may
-    // trigger a re-initialization of all TLS variables, paving over at
-    // least some destroyed ones with initial values.
-    //
-    // This means that if we drop a TLS value in place on macOS that we could
-    // revert the value to its original state halfway through the
-    // destructor, which would be bad!
-    //
-    // Hence, we use `ptr::read` on macOS (to move to a "safe" location)
-    // instead of drop_in_place.
-    cfg!(target_os = "macos")
+    false
 }
diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs
index efd231e..5d2eb5f 100644
--- a/src/libstd/thread/local.rs
+++ b/src/libstd/thread/local.rs
@@ -69,9 +69,6 @@
 ///    destroyed, but not all platforms have this guard. Those platforms that do
 ///    not guard typically have a synthetic limit after which point no more
 ///    destructors are run.
-/// 3. On macOS, initializing TLS during destruction of other TLS slots can
-///    sometimes cancel *all* destructors for the current thread, whether or not
-///    the slots have already had their destructors run or not.
 ///
 /// [`with`]: ../../std/thread/struct.LocalKey.html#method.with
 /// [`thread_local!`]: ../../std/macro.thread_local.html
@@ -604,11 +601,8 @@
     }
 
     // Note that this test will deadlock if TLS destructors aren't run (this
-    // requires the destructor to be run to pass the test). macOS has a known bug
-    // where dtors-in-dtors may cancel other destructors, so we just ignore this
-    // test on macOS.
+    // requires the destructor to be run to pass the test).
     #[test]
-    #[cfg_attr(target_os = "macos", ignore)]
     fn dtors_in_dtors_in_dtors() {
         struct S1(Sender<()>);
         thread_local!(static K1: UnsafeCell<Option<S1>> = UnsafeCell::new(None));